Ensure shell command skills trigger approval (#12697)

Summary - detect skill-invoking shell commands based on the original command string, request approvals when needed, and cache positive decisions per session - keep implicit skill invocation emitted after approval and keep skill approval decline messaging centralized to the shell handler - expand and adjust skill approval tests to cover shell-based skill scripts while matching the new detection expectations Testing - Not run (not requested)
2026-04-27 16:15:09 +00:00 · 2026-02-24 12:13:20 -08:00
parent 061d1d3b5e
commit daf0f03ac8
10 changed files with 540 additions and 120 deletions
--- a/codex-rs/app-server/tests/suite/v2/skill_approval.rs
+++ b/codex-rs/app-server/tests/suite/v2/skill_approval.rs
@@ -3,6 +3,7 @@ use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_responses_server_sequence;
 use app_test_support::to_response;
+use app_test_support::write_mock_responses_config_toml;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::ServerRequest;
@@ -11,19 +12,72 @@ use codex_app_server_protocol::ThreadStartResponse;
 use codex_app_server_protocol::TurnStartParams;
 use codex_app_server_protocol::TurnStartResponse;
 use codex_app_server_protocol::UserInput as V2UserInput;
+use codex_core::features::Feature;
+use core_test_support::responses;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use std::collections::BTreeMap;
+use std::fs;
+use std::path::Path;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

+fn write_skill_with_script(
+    home: &Path,
+    name: &str,
+    script_body: &str,
+) -> Result<std::path::PathBuf> {
+    let skill_dir = home.join("skills").join(name);
+    let scripts_dir = skill_dir.join("scripts");
+    fs::create_dir_all(&scripts_dir)?;
+    fs::write(
+        skill_dir.join("SKILL.md"),
+        format!("---\nname: {name}\ndescription: {name} skill\n---\n"),
+    )?;
+    let script_path = scripts_dir.join("run.py");
+    fs::write(&script_path, script_body)?;
+    Ok(script_path)
+}
+
+fn shell_command_response(tool_call_id: &str, command: &str) -> Result<String> {
+    let arguments = serde_json::to_string(&json!({
+        "command": command,
+        "timeout_ms": 500,
+    }))?;
+    Ok(responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_function_call(tool_call_id, "shell_command", &arguments),
+        responses::ev_completed("resp-1"),
+    ]))
+}
+
+fn command_for_script(script_path: &Path) -> Result<String> {
+    let runner = if cfg!(windows) { "python" } else { "python3" };
+    let script_path = script_path.to_string_lossy().into_owned();
+    Ok(shlex::try_join([runner, script_path.as_str()])?)
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn skill_request_approval_round_trip() -> Result<()> {
+async fn skill_request_approval_round_trip_on_shell_command_skill_script_exec() -> Result<()> {
    let codex_home = tempfile::TempDir::new()?;
-    let server =
-        create_mock_responses_server_sequence(vec![create_final_assistant_message_sse_response(
-            "done",
-        )?])
-        .await;
-    create_config_toml(codex_home.path(), &server.uri())?;
+    let script_path = write_skill_with_script(codex_home.path(), "demo", "print('hello')")?;
+    let tool_call_id = "skill-call";
+    let command = command_for_script(&script_path)?;
+    let server = create_mock_responses_server_sequence(vec![
+        shell_command_response(tool_call_id, &command)?,
+        create_final_assistant_message_sse_response("done")?,
+    ])
+    .await;
+    write_mock_responses_config_toml(
+        codex_home.path(),
+        &server.uri(),
+        &BTreeMap::from([(Feature::SkillApproval, true)]),
+        8192,
+        Some(false),
+        "mock_provider",
+        "compact",
+    )?;

    let mut mcp = McpProcess::new(codex_home.path()).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
@@ -57,7 +111,7 @@ async fn skill_request_approval_round_trip() -> Result<()> {
        mcp.read_stream_until_response_message(RequestId::Integer(turn_start_id)),
    )
    .await??;
-    let TurnStartResponse { turn, .. } = to_response(turn_start_resp)?;
+    let TurnStartResponse { .. } = to_response::<TurnStartResponse>(turn_start_resp)?;

    let server_req = timeout(
        DEFAULT_READ_TIMEOUT,
@@ -68,8 +122,8 @@ async fn skill_request_approval_round_trip() -> Result<()> {
        panic!("expected SkillRequestApproval request, got: {server_req:?}");
    };

-    assert_eq!(params.item_id, turn.id);
-    assert_eq!(params.skill_name, "test-skill");
+    assert_eq!(params.item_id, tool_call_id);
+    assert_eq!(params.skill_name, "demo");

    mcp.send_response(request_id, serde_json::json!({ "decision": "approve" }))
        .await?;
@@ -82,28 +136,3 @@ async fn skill_request_approval_round_trip() -> Result<()> {

    Ok(())
 }
-
-fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
-    let config_toml = codex_home.join("config.toml");
-    std::fs::write(
-        config_toml,
-        format!(
-            r#"
-model = "mock-model"
-approval_policy = "never"
-sandbox_mode = "read-only"
-
-model_provider = "mock_provider"
-
-[features]
-skill_approval = true
-
-[model_providers.mock_provider]
-name = "Mock provider for test"
-base_url = "{server_uri}/v1"
-request_max_retries = 0
-stream_max_retries = 0
-"#
-        ),
-    )
-}