feat: run zsh fork shell tool via shell-escalation (#12649)

## Why This PR switches the `shell_command` zsh-fork path over to `codex-shell-escalation` so the new shell tool can use the shared exec-wrapper/escalation protocol instead of the `zsh_exec_bridge` implementation that was introduced in https://github.com/openai/codex/pull/12052. `zsh_exec_bridge` relied on UNIX domain sockets, which is not as tamper-proof as the FD-based approach in `codex-shell-escalation`. ## What Changed - Added a Unix zsh-fork runtime adapter in `core` (`core/src/tools/runtimes/shell/unix_escalation.rs`) that: - runs zsh-fork commands through `codex_shell_escalation::run_escalate_server` - bridges exec-policy / approval decisions into `ShellActionProvider` - executes escalated commands via a `ShellCommandExecutor` that calls `process_exec_tool_call` - Updated `ShellRuntime` / `ShellCommandHandler` / tool spec wiring to select a `shell_command` backend (`classic` vs `zsh-fork`) while leaving the generic `shell` tool path unchanged. - Removed the `zsh_exec_bridge`-based session service and deleted `core/src/zsh_exec_bridge/mod.rs`. - Moved exec-wrapper entrypoint dispatch to `arg0` by handling the `codex-execve-wrapper` arg0 alias there, and removed the old `codex_core::maybe_run_zsh_exec_wrapper_mode()` hooks from `cli` and `app-server` mains. - Added the needed `codex-shell-escalation` dependencies for `core` and `arg0`. ## Tests - `cargo test -p codex-core shell_zsh_fork_prefers_shell_command_over_unified_exec` - `cargo test -p codex-app-server turn_start_shell_zsh_fork -- --nocapture` - verifies zsh-fork command execution and approval flows through the new backend - includes subcommand approve/decline coverage using the shared zsh DotSlash fixture in `app-server/tests/suite/zsh` - To test manually, I added the following to `~/.codex/config.toml`: ```toml zsh_path = "/Users/mbolin/code/codex3/codex-rs/app-server/tests/suite/zsh" [features] shell_zsh_fork = true ``` Then I ran `just c` to run the dev build of Codex with these changes and sent it the message: ``` run `echo $0` ``` And it replied with: ``` echo $0 printed: /Users/mbolin/code/codex3/codex-rs/app-server/tests/suite/zsh In this tool context, $0 reflects the script path used to invoke the shell, not just zsh. ``` so the tool appears to be wired up correctly. ## Notes - The zsh subcommand-decline integration test now uses `rm` under a `WorkspaceWrite` sandbox. The previous `/usr/bin/true` scenario is auto-allowed by the new `shell-escalation` policy path, which no longer produces subcommand approval prompts.
2026-04-27 16:15:09 +00:00 · 2026-02-24 10:31:08 -08:00
parent 8d3d58f992
commit 3ca0e7673b
24 changed files with 706 additions and 723 deletions
--- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
+++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
@@ -35,7 +35,6 @@ use core_test_support::responses;
 use core_test_support::skip_if_no_network;
 use pretty_assertions::assert_eq;
 use std::collections::BTreeMap;
-use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;
@@ -444,32 +443,17 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
        return Ok(());
    }
    eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
-    let zsh_path_for_config = {
-        // App-server config accepts only a zsh path, not extra argv. Use a
-        // wrapper so this test can force `-df` and downgrade `-lc` to `-c`
-        // to avoid rc/login-shell startup noise.
-        let path = workspace.join("zsh-no-rc");
-        std::fs::write(
-            &path,
-            format!(
-                r#"#!/bin/sh
-if [ "$1" = "-lc" ]; then
-  shift
-  set -- -c "$@"
-fi
-exec "{}" -df "$@"
-"#,
-                zsh_path.display()
-            ),
-        )?;
-        let mut permissions = std::fs::metadata(&path)?.permissions();
-        permissions.set_mode(0o755);
-        std::fs::set_permissions(&path, permissions)?;
-        path
-    };
-
+    let first_file = workspace.join("first.txt");
+    let second_file = workspace.join("second.txt");
+    std::fs::write(&first_file, "one")?;
+    std::fs::write(&second_file, "two")?;
+    let shell_command = format!(
+        "/bin/rm {} && /bin/rm {}",
+        first_file.display(),
+        second_file.display()
+    );
    let tool_call_arguments = serde_json::to_string(&serde_json::json!({
-        "command": "/usr/bin/true && /usr/bin/true",
+        "command": shell_command,
        "workdir": serde_json::Value::Null,
        "timeout_ms": 5000
    }))?;
@@ -495,13 +479,13 @@ exec "{}" -df "$@"
    create_config_toml(
        &codex_home,
        &server.uri(),
-        "on-request",
+        "untrusted",
        &BTreeMap::from([
            (Feature::ShellZshFork, true),
            (Feature::UnifiedExec, false),
            (Feature::ShellSnapshot, false),
        ]),
-        &zsh_path_for_config,
+        &zsh_path,
    )?;

    let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
@@ -525,21 +509,17 @@ exec "{}" -df "$@"
        .send_turn_start_request(TurnStartParams {
            thread_id: thread.id.clone(),
            input: vec![V2UserInput::Text {
-                text: "run true true".to_string(),
+                text: "remove both files".to_string(),
                text_elements: Vec::new(),
            }],
            cwd: Some(workspace.clone()),
-            approval_policy: Some(codex_app_server_protocol::AskForApproval::OnRequest),
-            sandbox_policy: Some(if cfg!(target_os = "linux") {
-                // The zsh exec-bridge wrapper uses a Unix socket back to the parent
-                // process. Linux restricted sandbox seccomp denies connect(2), so use
-                // full access here; this test is validating zsh approval/decline
-                // behavior, not Linux sandboxing.
-                codex_app_server_protocol::SandboxPolicy::DangerFullAccess
-            } else {
-                codex_app_server_protocol::SandboxPolicy::ReadOnly {
-                    access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
-                }
+            approval_policy: Some(codex_app_server_protocol::AskForApproval::UnlessTrusted),
+            sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite {
+                writable_roots: vec![workspace.clone().try_into()?],
+                read_only_access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
+                network_access: false,
+                exclude_tmpdir_env_var: false,
+                exclude_slash_tmp: false,
            }),
            model: Some("mock-model".to_string()),
            effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
@@ -554,7 +534,8 @@ exec "{}" -df "$@"
    .await??;
    let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;

-    let mut approval_ids = Vec::new();
+    let mut approved_subcommand_strings = Vec::new();
+    let mut approved_subcommand_ids = Vec::new();
    let mut saw_parent_approval = false;
    let target_decisions = [
        CommandExecutionApprovalDecision::Accept,
@@ -573,38 +554,45 @@ exec "{}" -df "$@"
        };
        assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
        assert_eq!(params.thread_id, thread.id);
-        let is_target_subcommand = params.command.as_deref() == Some("/usr/bin/true");
+        let approval_command = params
+            .command
+            .as_deref()
+            .expect("approval command should be present");
+        let is_target_subcommand = (approval_command.starts_with("/bin/rm ")
+            || approval_command.starts_with("/usr/bin/rm "))
+            && (approval_command.contains(&first_file.display().to_string())
+                || approval_command.contains(&second_file.display().to_string()));
        if is_target_subcommand {
-            approval_ids.push(
+            assert!(
+                approval_command.contains(&first_file.display().to_string())
+                    || approval_command.contains(&second_file.display().to_string()),
+                "expected zsh subcommand approval for one of the rm commands, got: {approval_command}"
+            );
+            approved_subcommand_ids.push(
                params
                    .approval_id
                    .clone()
                    .expect("approval_id must be present for zsh subcommand approvals"),
            );
+            approved_subcommand_strings.push(approval_command.to_string());
        }
+        let is_parent_approval = approval_command.contains(&zsh_path.display().to_string())
+            && approval_command.contains(&shell_command);
        let decision = if is_target_subcommand {
            let decision = target_decisions[target_decision_index].clone();
            target_decision_index += 1;
            decision
-        } else {
-            let command = params
-                .command
-                .as_deref()
-                .expect("approval command should be present");
+        } else if is_parent_approval {
            assert!(
                !saw_parent_approval,
-                "unexpected extra non-target approval: {command}"
-            );
-            assert!(
-                command.contains("zsh-no-rc"),
-                "expected parent zsh wrapper approval, got: {command}"
-            );
-            assert!(
-                command.contains("/usr/bin/true && /usr/bin/true"),
-                "expected tool command in parent approval, got: {command}"
+                "unexpected extra non-target approval: {approval_command}"
            );
            saw_parent_approval = true;
            CommandExecutionApprovalDecision::Accept
+        } else {
+            // Login shells may run startup helpers (for example path_helper on macOS)
+            // before the parent shell command or target subcommands are reached.
+            CommandExecutionApprovalDecision::Accept
        };
        mcp.send_response(
            request_id,
@@ -613,8 +601,15 @@ exec "{}" -df "$@"
        .await?;
    }

-    assert_eq!(approval_ids.len(), 2);
-    assert_ne!(approval_ids[0], approval_ids[1]);
+    assert!(
+        saw_parent_approval,
+        "expected parent shell approval request"
+    );
+    assert_eq!(approved_subcommand_ids.len(), 2);
+    assert_ne!(approved_subcommand_ids[0], approved_subcommand_ids[1]);
+    assert_eq!(approved_subcommand_strings.len(), 2);
+    assert!(approved_subcommand_strings[0].contains(&first_file.display().to_string()));
+    assert!(approved_subcommand_strings[1].contains(&second_file.display().to_string()));
    let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
        loop {
            let completed_notif = mcp