Require approval for fake shell escalation

2026-05-21 19:45:26 +00:00 · 2026-05-14 21:55:24 +02:00
12 changed files with 239 additions and 75 deletions
--- a/codex-rs/core/src/command_canonicalization.rs
+++ b/codex-rs/core/src/command_canonicalization.rs
@@ -7,29 +7,32 @@ const CANONICAL_POWERSHELL_SCRIPT_PREFIX: &str = "__codex_powershell_script__";

 /// Canonicalize command argv for approval-cache matching.
 ///
-/// This keeps approval decisions stable across wrapper-path differences (for
-/// example `/bin/bash -lc` vs `bash -lc`) and across shell wrapper tools while
-/// preserving exact script text for complex scripts where we cannot safely
-/// recover a tokenized command sequence.
+/// This keeps approval decisions stable across shell script whitespace changes
+/// while preserving the shell executable identity. The executable is part of the
+/// key because it starts before the shell script and can be attacker controlled.
 pub(crate) fn canonicalize_command_for_approval(command: &[String]) -> Vec<String> {
-    if let Some(commands) = parse_shell_lc_plain_commands(command)
-        && let [single_command] = commands.as_slice()
-    {
-        return single_command.clone();
-    }
-
-    if let Some((_shell, script)) = extract_bash_command(command) {
+    if let Some((shell, script)) = extract_bash_command(command) {
        let shell_mode = command.get(1).cloned().unwrap_or_default();
+        if let Some(commands) = parse_shell_lc_plain_commands(command)
+            && let [single_command] = commands.as_slice()
+        {
+            let mut canonical_command = vec![shell.to_string(), shell_mode];
+            canonical_command.extend(single_command.clone());
+            return canonical_command;
+        }
+
        return vec![
            CANONICAL_BASH_SCRIPT_PREFIX.to_string(),
+            shell.to_string(),
            shell_mode,
            script.to_string(),
        ];
    }

-    if let Some((_shell, script)) = extract_powershell_command(command) {
+    if let Some((shell, script)) = extract_powershell_command(command) {
        return vec![
            CANONICAL_POWERSHELL_SCRIPT_PREFIX.to_string(),
+            shell.to_string(),
            script.to_string(),
        ];
    }
--- a/codex-rs/core/src/command_canonicalization_tests.rs
+++ b/codex-rs/core/src/command_canonicalization_tests.rs
@@ -2,7 +2,7 @@ use super::canonicalize_command_for_approval;
 use pretty_assertions::assert_eq;

 #[test]
-fn canonicalizes_word_only_shell_scripts_to_inner_command() {
+fn canonicalizes_word_only_shell_scripts_with_shell_identity() {
    let command_a = vec![
        "/bin/bash".to_string(),
        "-lc".to_string(),
@@ -17,13 +17,15 @@ fn canonicalizes_word_only_shell_scripts_to_inner_command() {
    assert_eq!(
        canonicalize_command_for_approval(&command_a),
        vec![
+            "/bin/bash".to_string(),
+            "-lc".to_string(),
            "cargo".to_string(),
            "test".to_string(),
            "-p".to_string(),
            "codex-core".to_string(),
        ]
    );
-    assert_eq!(
+    assert_ne!(
        canonicalize_command_for_approval(&command_a),
        canonicalize_command_for_approval(&command_b)
    );
@@ -43,11 +45,12 @@ fn canonicalizes_heredoc_scripts_to_stable_script_key() {
        canonicalize_command_for_approval(&command_a),
        vec![
            "__codex_shell_script__".to_string(),
+            "/bin/zsh".to_string(),
            "-lc".to_string(),
            script.to_string(),
        ]
    );
-    assert_eq!(
+    assert_ne!(
        canonicalize_command_for_approval(&command_a),
        canonicalize_command_for_approval(&command_b)
    );
@@ -72,10 +75,11 @@ fn canonicalizes_powershell_wrappers_to_stable_script_key() {
        canonicalize_command_for_approval(&command_a),
        vec![
            "__codex_powershell_script__".to_string(),
+            "powershell.exe".to_string(),
            script.to_string(),
        ]
    );
-    assert_eq!(
+    assert_ne!(
        canonicalize_command_for_approval(&command_a),
        canonicalize_command_for_approval(&command_b)
    );
--- a/codex-rs/core/src/exec_policy.rs
+++ b/codex-rs/core/src/exec_policy.rs
@@ -246,6 +246,7 @@ pub(crate) struct ExecApprovalRequest<'a> {
    pub(crate) sandbox_cwd: &'a Path,
    pub(crate) sandbox_permissions: SandboxPermissions,
    pub(crate) prefix_rule: Option<Vec<String>>,
+    pub(crate) allow_execpolicy_amendment: bool,
 }

 impl ExecPolicyManager {
@@ -281,6 +282,7 @@ impl ExecPolicyManager {
            sandbox_cwd,
            sandbox_permissions,
            prefix_rule,
+            allow_execpolicy_amendment,
        } = req;
        let exec_policy = self.current();
        let ExecPolicyCommands {
@@ -291,7 +293,9 @@ impl ExecPolicyManager {
        // Keep heredoc prefix parsing for rule evaluation so existing
        // allow/prompt/forbidden rules still apply, but avoid auto-derived
        // amendments when only the heredoc fallback parser matched.
-        let auto_amendment_allowed = !used_complex_parsing;
+        let auto_amendment_allowed = !used_complex_parsing
+            && allow_execpolicy_amendment
+            && !sandbox_permissions.requests_sandbox_override();
        let exec_policy_fallback = |cmd: &[String]| {
            render_decision_for_unmatched_command(
                cmd,
@@ -649,7 +653,6 @@ pub(crate) fn render_decision_for_unmatched_command(
            codex_shell_command::is_safe_command::is_safe_powershell_words(command)
        }
    };
-
    // On Windows, ReadOnly sandbox is not a real sandbox, so special-case it
    // here.
    let environment_lacks_sandbox_protections = cfg!(windows)
@@ -701,6 +704,15 @@ pub(crate) fn render_decision_for_unmatched_command(
        };
    }

+    if sandbox_permissions.requests_sandbox_override()
+        && matches!(
+            file_system_sandbox_policy.kind,
+            FileSystemSandboxKind::Restricted
+        )
+    {
+        return Decision::Prompt;
+    }
+
    match approval_policy {
        AskForApproval::Never | AskForApproval::OnFailure => {
            // We allow the command to run, relying on the sandbox for
--- a/codex-rs/core/src/exec_policy_tests.rs
+++ b/codex-rs/core/src/exec_policy_tests.rs
@@ -938,14 +938,55 @@ EOF"#
        },
        ExecApprovalRequirement::NeedsApproval {
            reason: None,
-            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
-                "zsh".to_string(),
+            proposed_execpolicy_amendment: None,
+        },
+    )
+    .await;
+}
+
+#[tokio::test]
+async fn known_safe_shell_wrapper_with_escalation_requires_approval() {
+    assert_exec_approval_requirement_for_command(
+        ExecApprovalRequirementScenario {
+            policy_src: None,
+            command: vec![
+                "/tmp/cc01_fake/bash".to_string(),
                "-lc".to_string(),
-                r#"cat <<'EOF' > /some/important/folder/test.txt
-hello world
-EOF"#
-                    .to_string(),
-            ])),
+                "ls".to_string(),
+            ],
+            approval_policy: AskForApproval::OnRequest,
+            sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+            file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
+            prefix_rule: None,
+        },
+        ExecApprovalRequirement::NeedsApproval {
+            reason: None,
+            proposed_execpolicy_amendment: None,
+        },
+    )
+    .await;
+}
+
+#[tokio::test]
+async fn known_safe_shell_wrapper_with_on_failure_escalation_requires_approval() {
+    assert_exec_approval_requirement_for_command(
+        ExecApprovalRequirementScenario {
+            policy_src: None,
+            command: vec![
+                "/tmp/cc01_fake/bash".to_string(),
+                "-lc".to_string(),
+                "ls".to_string(),
+            ],
+            approval_policy: AskForApproval::OnFailure,
+            sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+            file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
+            prefix_rule: None,
+        },
+        ExecApprovalRequirement::NeedsApproval {
+            reason: None,
+            proposed_execpolicy_amendment: None,
        },
    )
    .await;
@@ -1265,10 +1306,7 @@ async fn exec_approval_requirement_prompts_for_inline_additional_permissions_und
        },
        ExecApprovalRequirement::NeedsApproval {
            reason: None,
-            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
-                "touch".to_string(),
-                "requested-dir/requested-but-unused.txt".to_string(),
-            ])),
+            proposed_execpolicy_amendment: None,
        },
    )
    .await;
@@ -1288,10 +1326,7 @@ async fn exec_approval_requirement_prompts_for_known_safe_escalation_under_on_re
        },
        ExecApprovalRequirement::NeedsApproval {
            reason: None,
-            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
-                "echo".to_string(),
-                "hello".to_string(),
-            ])),
+            proposed_execpolicy_amendment: None,
        },
    )
    .await;
@@ -1380,6 +1415,7 @@ async fn mixed_rule_and_sandbox_prompt_prioritizes_rule_for_rejection_decision()
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::RequireEscalated,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1420,6 +1456,7 @@ async fn mixed_rule_and_sandbox_prompt_rejects_when_granular_rules_are_disabled(
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::RequireEscalated,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1447,6 +1484,7 @@ async fn exec_approval_requirement_falls_back_to_heuristics() {
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::UseDefault,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1475,6 +1513,7 @@ async fn empty_bash_lc_script_falls_back_to_original_command() {
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::UseDefault,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1507,6 +1546,7 @@ async fn whitespace_bash_lc_script_falls_back_to_original_command() {
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::UseDefault,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1539,6 +1579,7 @@ async fn request_rule_uses_prefix_rule() {
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::RequireEscalated,
            prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1546,10 +1587,7 @@ async fn request_rule_uses_prefix_rule() {
        requirement,
        ExecApprovalRequirement::NeedsApproval {
            reason: None,
-            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
-                "cargo".to_string(),
-                "install".to_string(),
-            ])),
+            proposed_execpolicy_amendment: None,
        }
    );
 }
@@ -1572,6 +1610,7 @@ async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands(
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions: SandboxPermissions::RequireEscalated,
            prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
+            allow_execpolicy_amendment: true,
        })
        .await;

@@ -1579,11 +1618,7 @@ async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands(
        requirement,
        ExecApprovalRequirement::NeedsApproval {
            reason: None,
-            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
-                "rm".to_string(),
-                "-rf".to_string(),
-                "/tmp/codex".to_string(),
-            ])),
+            proposed_execpolicy_amendment: None,
        }
    );
 }
@@ -1612,6 +1647,7 @@ async fn heuristics_apply_when_other_commands_match_policy() {
                sandbox_cwd: Path::new("/tmp"),
                sandbox_permissions: SandboxPermissions::UseDefault,
                prefix_rule: None,
+                allow_execpolicy_amendment: true,
            })
            .await,
        ExecApprovalRequirement::NeedsApproval {
@@ -2105,6 +2141,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
                sandbox_cwd: Path::new("/tmp"),
                sandbox_permissions: permissions,
                prefix_rule: None,
+                allow_execpolicy_amendment: true,
            })
            .await,
        "{pwsh_approval_reason}"
@@ -2132,6 +2169,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
                sandbox_cwd: Path::new("/tmp"),
                sandbox_permissions: permissions,
                prefix_rule: None,
+                allow_execpolicy_amendment: true,
            })
            .await,
        r#"On all platforms, a forbidden command should require approval
@@ -2155,6 +2193,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
                sandbox_cwd: Path::new("/tmp"),
                sandbox_permissions: permissions,
                prefix_rule: None,
+                allow_execpolicy_amendment: true,
            })
            .await,
        r#"On all platforms, a forbidden command should require approval
@@ -2256,6 +2295,7 @@ async fn exec_approval_requirement_for_command(
            sandbox_cwd: Path::new("/tmp"),
            sandbox_permissions,
            prefix_rule,
+            allow_execpolicy_amendment: true,
        })
        .await
 }
--- a/codex-rs/core/src/session/tests.rs
+++ b/codex-rs/core/src/session/tests.rs
@@ -9533,6 +9533,7 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
            sandbox_cwd: turn_context.cwd.as_path(),
            sandbox_permissions: SandboxPermissions::UseDefault,
            prefix_rule: None,
+            allow_execpolicy_amendment: true,
        })
        .await;
    assert!(matches!(
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -193,6 +193,7 @@ async fn run_exec_like(args: RunExecLikeArgs) -> Result<FunctionToolOutput, Func
                effective_additional_permissions.sandbox_permissions
            },
            prefix_rule,
+            allow_execpolicy_amendment: true,
        })
        .await;

--- a/codex-rs/core/src/tools/handlers/unified_exec/exec_command.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec/exec_command.rs
@@ -130,6 +130,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
        let environment = Arc::clone(&turn_environment.environment);
        let fs = environment.get_filesystem();
        let args: ExecCommandArgs = parse_arguments_with_base_path(&arguments, &cwd)?;
+        let model_provided_shell = args.shell.is_some();
        let hook_command = args.cmd.clone();
        maybe_emit_implicit_skill_invocation(
            session.as_ref(),
@@ -268,6 +269,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
                        .permissions_preapproved,
                    justification,
                    prefix_rule,
+                    allow_execpolicy_amendment: !model_provided_shell,
                },
                &context,
            )
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -103,6 +103,7 @@ pub(crate) struct ExecCommandRequest {
    pub additional_permissions_preapproved: bool,
    pub justification: Option<String>,
    pub prefix_rule: Option<Vec<String>>,
+    pub allow_execpolicy_amendment: bool,
 }

 #[derive(Debug)]
--- a/codex-rs/core/src/unified_exec/process_manager.rs
+++ b/codex-rs/core/src/unified_exec/process_manager.rs
@@ -1037,6 +1037,7 @@ impl UnifiedExecProcessManager {
                    request.sandbox_permissions
                },
                prefix_rule: request.prefix_rule.clone(),
+                allow_execpolicy_amendment: request.allow_execpolicy_amendment,
            })
            .await;
        let req = UnifiedExecToolRequest {
--- a/codex-rs/core/src/unified_exec/process_manager_tests.rs
+++ b/codex-rs/core/src/unified_exec/process_manager_tests.rs
@@ -191,6 +191,7 @@ async fn failed_initial_end_for_unstored_process_uses_fallback_output() {
        additional_permissions_preapproved: false,
        justification: None,
        prefix_rule: None,
+        allow_execpolicy_amendment: true,
    };

    let transcript = Arc::new(tokio::sync::Mutex::new(HeadTailBuffer::default()));
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@@ -2657,17 +2657,14 @@ async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Resu
    .await?;

    let approval = expect_exec_approval(&test, command).await;
-    let amendment = approval
-        .proposed_execpolicy_amendment
-        .expect("should have a proposed execpolicy amendment");
-    assert!(amendment.command.contains(&command.to_string()));
+    assert_eq!(approval.proposed_execpolicy_amendment, None);

    Ok(())
 }

 #[tokio::test(flavor = "current_thread")]
 #[cfg(unix)]
-async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
+async fn approving_fallback_rule_for_sandbox_override_is_not_persisted() -> Result<()> {
    let server = start_mock_server().await;
    let approval_policy = AskForApproval::OnRequest;
    let sandbox_policy = SandboxPolicy::new_read_only_policy();
@@ -2711,18 +2708,13 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {

    let approval = expect_exec_approval(&test, command).await;
    let approval_id = approval.effective_approval_id();
-    let amendment = approval
-        .proposed_execpolicy_amendment
-        .expect("should have a proposed execpolicy amendment");
-    assert!(amendment.command.contains(&command.to_string()));
+    assert_eq!(approval.proposed_execpolicy_amendment, None);

    test.codex
        .submit(Op::ExecApproval {
            id: approval_id,
            turn_id: None,
-            decision: ReviewDecision::ApprovedExecpolicyAmendment {
-                proposed_execpolicy_amendment: amendment.clone(),
-            },
+            decision: ReviewDecision::Approved,
        })
        .await?;
    wait_for_completion(&test).await;
@@ -2747,14 +2739,6 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
        ]),
    )
    .await;
-    let second_results = mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-invalid-prefix-1", "done"),
-            ev_completed("resp-invalid-prefix-2"),
-        ]),
-    )
-    .await;

    submit_turn(
        &test,
@@ -2764,19 +2748,8 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
    )
    .await?;

-    wait_for_completion_without_approval(&test).await;
-
-    let second_output = parse_result(
-        &second_results
-            .single_request()
-            .function_call_output(call_id),
-    );
-    assert_eq!(second_output.exit_code.unwrap_or(0), 0);
-    assert!(
-        second_output.stdout.is_empty(),
-        "unexpected stdout: {}",
-        second_output.stdout
-    );
+    let second_approval = expect_exec_approval(&test, command).await;
+    assert_eq!(second_approval.proposed_execpolicy_amendment, None);

    Ok(())
 }
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -1,10 +1,14 @@
 use std::collections::HashMap;
+use std::env;
 use std::ffi::OsStr;
 use std::fs;
+#[cfg(unix)]
+use std::os::unix::fs::PermissionsExt;
 use std::sync::OnceLock;

 use anyhow::Context;
 use anyhow::Result;
+use codex_core::sandboxing::SandboxPermissions;
 use codex_exec_server::CreateDirectoryOptions;
 use codex_features::Feature;
 use codex_protocol::models::PermissionProfile;
@@ -423,6 +427,127 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[cfg(unix)]
+async fn unified_exec_fake_shell_escalation_requires_approval() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_model("gpt-5.2").with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config
+            .features
+            .enable(Feature::UnifiedExec)
+            .expect("test config should allow feature update");
+    });
+    let test = builder.build_remote_aware(&server).await?;
+
+    let fake_shell_dir = test.config.cwd.join("cc01_fake");
+    fs::create_dir_all(&fake_shell_dir)?;
+    let fake_shell = fake_shell_dir.join("bash");
+    let proof_file = env::current_dir()?.join("cli_8725_fake_shell_proof.txt");
+    let _ = fs::remove_file(&proof_file);
+
+    fs::write(
+        &fake_shell,
+        format!(
+            "#!/bin/sh\nprintf 'fake shell escaped' > {}\nexit 0\n",
+            codex_shell_command::parse_command::shlex_join(&[proof_file
+                .to_string_lossy()
+                .to_string()]),
+        ),
+    )?;
+    let mut permissions = fs::metadata(&fake_shell)?.permissions();
+    permissions.set_mode(0o755);
+    fs::set_permissions(&fake_shell, permissions)?;
+
+    let call_id = "uexec-fake-shell-escalation";
+    let args = json!({
+        "cmd": "ls",
+        "shell": fake_shell.to_string_lossy().to_string(),
+        "yield_time_ms": 1_000,
+        "sandbox_permissions": SandboxPermissions::RequireEscalated,
+        "justification": "validate fake shell sandbox escape guard",
+    });
+
+    mount_sse_sequence(
+        &server,
+        vec![sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ])],
+    )
+    .await;
+
+    let session_model = test.session_configured.model.clone();
+    test.codex
+        .submit(Op::UserTurn {
+            environments: None,
+            items: vec![UserInput::Text {
+                text: "run fake shell escalation regression".into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+            cwd: test.config.cwd.to_path_buf(),
+            approval_policy: AskForApproval::OnRequest,
+            approvals_reviewer: None,
+            sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+            permission_profile: None,
+            model: session_model,
+            effort: None,
+            summary: None,
+            service_tier: None,
+            collaboration_mode: None,
+            personality: None,
+        })
+        .await?;
+
+    let event = wait_for_event_with_timeout(
+        &test.codex,
+        |event| {
+            matches!(
+                event,
+                EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
+            )
+        },
+        Duration::from_secs(5),
+    )
+    .await;
+
+    match event {
+        EventMsg::ExecApprovalRequest(approval) => {
+            assert_eq!(
+                approval.command,
+                vec![
+                    fake_shell.to_string_lossy().to_string(),
+                    "-lc".to_string(),
+                    "ls".to_string(),
+                ],
+            );
+            assert_eq!(approval.proposed_execpolicy_amendment, None);
+        }
+        EventMsg::TurnComplete(_) => {
+            assert!(
+                !proof_file.exists(),
+                "fake shell ran without approval and created outside proof file at {}",
+                proof_file.display(),
+            );
+            panic!("expected exec approval before turn completion");
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+
+    assert!(
+        !proof_file.exists(),
+        "fake shell should not run before approval: {}",
+        proof_file.display(),
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn unified_exec_resolves_relative_workdir() -> Result<()> {
    skip_if_no_network!(Ok(()));