#![allow(clippy::unwrap_used, clippy::expect_used)] use anyhow::Result; use codex_core::CodexThread; use codex_core::config::Constrained; use codex_core::config_loader::ConfigLayerStack; use codex_core::config_loader::ConfigLayerStackOrdering; use codex_core::config_loader::NetworkConstraints; use codex_core::config_loader::NetworkRequirementsToml; use codex_core::config_loader::RequirementSource; use codex_core::config_loader::Sourced; use codex_core::sandboxing::SandboxPermissions; use codex_features::Feature; use codex_protocol::approvals::NetworkApprovalProtocol; use codex_protocol::approvals::NetworkPolicyAmendment; use codex_protocol::approvals::NetworkPolicyRuleAction; use codex_protocol::protocol::ApplyPatchApprovalRequestEvent; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::ExecApprovalRequestEvent; use codex_protocol::protocol::ExecPolicyAmendment; use codex_protocol::protocol::Op; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::user_input::UserInput; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_sse_once; use core_test_support::responses::mount_sse_once_match; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_with_timeout; use core_test_support::zsh_fork::build_zsh_fork_test; use core_test_support::zsh_fork::restrictive_workspace_write_policy; use core_test_support::zsh_fork::zsh_fork_runtime; use pretty_assertions::assert_eq; use regex_lite::Regex; use serde_json::Value; use serde_json::json; use std::env; use std::fs; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use tempfile::TempDir; use wiremock::Mock; use wiremock::MockServer; use wiremock::Request; use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; #[derive(Clone, Copy)] enum TargetPath { Workspace(&'static str), OutsideWorkspace(&'static str), } impl TargetPath { fn resolve_for_patch(self, test: &TestCodex) -> (PathBuf, String) { match self { TargetPath::Workspace(name) => { let path = test.cwd.path().join(name); (path, name.to_string()) } TargetPath::OutsideWorkspace(name) => { let path = env::current_dir() .expect("current dir should be available") .join(name); (path.clone(), path.display().to_string()) } } } } #[derive(Clone)] enum ActionKind { WriteFile { target: TargetPath, content: &'static str, }, FetchUrlNoProxy { endpoint: &'static str, response_body: &'static str, }, FetchUrl { endpoint: &'static str, response_body: &'static str, }, RunCommand { command: &'static str, }, RunUnifiedExecCommand { command: &'static str, justification: Option<&'static str>, }, ApplyPatchFunction { target: TargetPath, content: &'static str, }, ApplyPatchShell { target: TargetPath, content: &'static str, }, } const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str = "Requires escalated permissions to bypass the sandbox in tests."; impl ActionKind { async fn prepare( &self, test: &TestCodex, server: &MockServer, call_id: &str, sandbox_permissions: SandboxPermissions, ) -> Result<(Value, Option)> { match self { ActionKind::WriteFile { target, content } => { let (path, _) = target.resolve_for_patch(test); let _ = fs::remove_file(&path); let path_str = path.display().to_string(); let script = format!( "from pathlib import Path; path = Path({path_str:?}); content = {content:?}; path.write_text(content, encoding='utf-8'); print(path.read_text(encoding='utf-8'), end='')", ); let command = format!("python3 -c {script:?}"); let event = shell_event( call_id, &command, /*timeout_ms*/ 5_000, sandbox_permissions, )?; Ok((event, Some(command))) } ActionKind::FetchUrl { endpoint, response_body, } => { Mock::given(method("GET")) .and(path(*endpoint)) .respond_with( ResponseTemplate::new(200).set_body_string(response_body.to_string()), ) .mount(server) .await; let url = format!("{}{}", server.uri(), endpoint); let escaped_url = url.replace('\'', "\\'"); let script = format!( "import sys\nimport urllib.request\nurl = '{escaped_url}'\ntry:\n data = urllib.request.urlopen(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)", ); let command = format!("python3 -c \"{script}\""); let event = shell_event( call_id, &command, /*timeout_ms*/ 5_000, sandbox_permissions, )?; Ok((event, Some(command))) } ActionKind::FetchUrlNoProxy { endpoint, response_body, } => { Mock::given(method("GET")) .and(path(*endpoint)) .respond_with( ResponseTemplate::new(200).set_body_string(response_body.to_string()), ) .mount(server) .await; let url = format!("{}{}", server.uri(), endpoint); let escaped_url = url.replace('\'', "\\'"); let script = format!( "import sys\nimport urllib.request\nurl = '{escaped_url}'\nopener = urllib.request.build_opener(urllib.request.ProxyHandler({{}}))\ntry:\n data = opener.open(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)", ); let command = format!("python3 -c \"{script}\""); let event = shell_event( call_id, &command, /*timeout_ms*/ 5_000, sandbox_permissions, )?; Ok((event, Some(command))) } ActionKind::RunCommand { command } => { let event = shell_event( call_id, command, /*timeout_ms*/ 2_000, sandbox_permissions, )?; Ok((event, Some(command.to_string()))) } ActionKind::RunUnifiedExecCommand { command, justification, } => { let event = exec_command_event( call_id, command, Some(1000), sandbox_permissions, *justification, )?; Ok((event, Some(command.to_string()))) } ActionKind::ApplyPatchFunction { target, content } => { let (path, patch_path) = target.resolve_for_patch(test); let _ = fs::remove_file(&path); let patch = build_add_file_patch(&patch_path, content); Ok((ev_apply_patch_function_call(call_id, &patch), None)) } ActionKind::ApplyPatchShell { target, content } => { let (path, patch_path) = target.resolve_for_patch(test); let _ = fs::remove_file(&path); let patch = build_add_file_patch(&patch_path, content); let command = shell_apply_patch_command(&patch); // Bazel may need to launch the configured Codex helper binary // to apply the verified patch, which can exceed the normal // short command timeout on slower CI runners. let timeout_ms = 30_000; let event = shell_event(call_id, &command, timeout_ms, sandbox_permissions)?; Ok((event, Some(command))) } } } } fn build_add_file_patch(patch_path: &str, content: &str) -> String { format!("*** Begin Patch\n*** Add File: {patch_path}\n+{content}\n*** End Patch\n") } fn shell_apply_patch_command(patch: &str) -> String { let mut script = String::from("apply_patch <<'PATCH'\n"); script.push_str(patch); if !patch.ends_with('\n') { script.push('\n'); } script.push_str("PATCH\n"); script } fn shell_event( call_id: &str, command: &str, timeout_ms: u64, sandbox_permissions: SandboxPermissions, ) -> Result { shell_event_with_prefix_rule( call_id, command, timeout_ms, sandbox_permissions, /*prefix_rule*/ None, ) } fn shell_event_with_prefix_rule( call_id: &str, command: &str, timeout_ms: u64, sandbox_permissions: SandboxPermissions, prefix_rule: Option>, ) -> Result { let mut args = json!({ "command": command, "timeout_ms": timeout_ms, }); if sandbox_permissions.requests_sandbox_override() { args["sandbox_permissions"] = json!(sandbox_permissions); } if let Some(prefix_rule) = prefix_rule { args["prefix_rule"] = json!(prefix_rule); } let args_str = serde_json::to_string(&args)?; Ok(ev_function_call(call_id, "shell_command", &args_str)) } fn exec_command_event( call_id: &str, cmd: &str, yield_time_ms: Option, sandbox_permissions: SandboxPermissions, justification: Option<&str>, ) -> Result { let mut args = json!({ "cmd": cmd.to_string(), }); if let Some(yield_time_ms) = yield_time_ms { args["yield_time_ms"] = json!(yield_time_ms); } if sandbox_permissions.requests_sandbox_override() { args["sandbox_permissions"] = json!(sandbox_permissions); let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION); args["justification"] = json!(reason); } let args_str = serde_json::to_string(&args)?; Ok(ev_function_call(call_id, "exec_command", &args_str)) } #[derive(Clone)] enum Expectation { FileCreated { target: TargetPath, content: &'static str, }, FileCreatedNoExitCode { target: TargetPath, content: &'static str, }, PatchApplied { target: TargetPath, content: &'static str, }, FileNotCreated { target: TargetPath, message_contains: &'static [&'static str], }, NetworkSuccess { body_contains: &'static str, }, NetworkSuccessNoExitCode { body_contains: &'static str, }, NetworkFailure { expect_tag: &'static str, }, CommandSuccess { stdout_contains: &'static str, }, CommandSuccessNoExitCode { stdout_contains: &'static str, }, CommandFailure { output_contains: &'static str, }, } impl Expectation { fn verify(&self, test: &TestCodex, result: &CommandResult) -> Result<()> { match self { Expectation::FileCreated { target, content } => { let (path, _) = target.resolve_for_patch(test); assert_eq!( result.exit_code, Some(0), "expected successful exit for {path:?}" ); assert!( result.stdout.contains(content), "stdout missing {content:?}: {}", result.stdout ); let file_contents = fs::read_to_string(&path)?; assert!( file_contents.contains(content), "file contents missing {content:?}: {file_contents}" ); let _ = fs::remove_file(path); } Expectation::FileCreatedNoExitCode { target, content } => { let (path, _) = target.resolve_for_patch(test); assert!( result.exit_code.is_none() || result.exit_code == Some(0), "expected no exit code for {path:?}", ); assert!( result.stdout.contains(content), "stdout missing {content:?}: {}", result.stdout ); let file_contents = fs::read_to_string(&path)?; assert!( file_contents.contains(content), "file contents missing {content:?}: {file_contents}" ); let _ = fs::remove_file(path); } Expectation::PatchApplied { target, content } => { let (path, _) = target.resolve_for_patch(test); match result.exit_code { Some(0) | None => { if result.exit_code.is_none() { assert!( result.stdout.contains("Success."), "patch output missing success indicator: {}", result.stdout ); } } Some(code) => panic!( "expected successful patch exit for {:?}, got {code} with stdout {}", path, result.stdout ), } let file_contents = fs::read_to_string(&path)?; assert!( file_contents.contains(content), "patched file missing {content:?}: {file_contents}" ); let _ = fs::remove_file(path); } Expectation::FileNotCreated { target, message_contains, } => { let (path, _) = target.resolve_for_patch(test); assert_ne!( result.exit_code, Some(0), "expected non-zero exit for {path:?}" ); for needle in *message_contains { if needle.contains('|') { let options: Vec<&str> = needle.split('|').collect(); let matches_any = options.iter().any(|option| result.stdout.contains(option)); assert!( matches_any, "stdout missing one of {options:?}: {}", result.stdout ); } else { assert!( result.stdout.contains(needle), "stdout missing {needle:?}: {}", result.stdout ); } } assert!( !path.exists(), "command should not create {path:?}, but file exists" ); } Expectation::NetworkSuccess { body_contains } => { assert_eq!( result.exit_code, Some(0), "expected successful network exit: {}", result.stdout ); assert!( result.stdout.contains("OK:"), "stdout missing OK prefix: {}", result.stdout ); assert!( result.stdout.contains(body_contains), "stdout missing body text {body_contains:?}: {}", result.stdout ); } Expectation::NetworkSuccessNoExitCode { body_contains } => { assert!( result.exit_code.is_none() || result.exit_code == Some(0), "expected no exit code for successful network call: {}", result.stdout ); assert!( result.stdout.contains("OK:"), "stdout missing OK prefix: {}", result.stdout ); assert!( result.stdout.contains(body_contains), "stdout missing body text {body_contains:?}: {}", result.stdout ); } Expectation::NetworkFailure { expect_tag } => { assert_ne!( result.exit_code, Some(0), "expected non-zero exit for network failure: {}", result.stdout ); assert!( result.stdout.contains("ERR:"), "stdout missing ERR prefix: {}", result.stdout ); assert!( result.stdout.contains(expect_tag), "stdout missing expected tag {expect_tag:?}: {}", result.stdout ); } Expectation::CommandSuccess { stdout_contains } => { assert_eq!( result.exit_code, Some(0), "expected successful trusted command exit: {}", result.stdout ); assert!( result.stdout.contains(stdout_contains), "trusted command stdout missing {stdout_contains:?}: {}", result.stdout ); } Expectation::CommandSuccessNoExitCode { stdout_contains } => { assert!( result.exit_code.is_none() || result.exit_code == Some(0), "expected no exit code for trusted command: {}", result.stdout ); assert!( result.stdout.contains(stdout_contains), "trusted command stdout missing {stdout_contains:?}: {}", result.stdout ); } Expectation::CommandFailure { output_contains } => { assert_ne!( result.exit_code, Some(0), "expected non-zero exit for command failure: {}", result.stdout ); assert!( result.stdout.contains(output_contains), "command failure stderr missing {output_contains:?}: {}", result.stdout ); } } Ok(()) } } #[derive(Clone)] enum Outcome { Auto, ExecApproval { decision: ReviewDecision, expected_reason: Option<&'static str>, }, PatchApproval { decision: ReviewDecision, expected_reason: Option<&'static str>, }, } #[derive(Clone)] struct ScenarioSpec { name: &'static str, approval_policy: AskForApproval, sandbox_policy: SandboxPolicy, action: ActionKind, sandbox_permissions: SandboxPermissions, features: Vec, model_override: Option<&'static str>, outcome: Outcome, expectation: Expectation, } struct CommandResult { exit_code: Option, stdout: String, } async fn submit_turn( test: &TestCodex, prompt: &str, approval_policy: AskForApproval, sandbox_policy: SandboxPolicy, ) -> Result<()> { let session_model = test.session_configured.model.clone(); test.codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: prompt.into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: test.cwd.path().to_path_buf(), approval_policy, approvals_reviewer: None, sandbox_policy, model: session_model, effort: None, summary: None, service_tier: None, collaboration_mode: None, personality: None, }) .await?; Ok(()) } fn parse_result(item: &Value) -> CommandResult { let output_str = item .get("output") .and_then(Value::as_str) .expect("shell output payload"); match serde_json::from_str::(output_str) { Ok(parsed) => { let exit_code = parsed["metadata"]["exit_code"].as_i64(); let stdout = parsed["output"].as_str().unwrap_or_default().to_string(); CommandResult { exit_code, stdout } } Err(_) => { let structured = Regex::new(r"(?s)^Exit code:\s*(-?\d+).*?Output:\n(.*)$").unwrap(); let regex = Regex::new(r"(?s)^.*?Process exited with code (\d+)\n.*?Output:\n(.*)$").unwrap(); // parse freeform output if let Some(captures) = structured.captures(output_str) { let exit_code = captures.get(1).unwrap().as_str().parse::().unwrap(); let output = captures.get(2).unwrap().as_str(); CommandResult { exit_code: Some(exit_code), stdout: output.to_string(), } } else if let Some(captures) = regex.captures(output_str) { let exit_code = captures.get(1).unwrap().as_str().parse::().unwrap(); let output = captures.get(2).unwrap().as_str(); CommandResult { exit_code: Some(exit_code), stdout: output.to_string(), } } else { CommandResult { exit_code: None, stdout: output_str.to_string(), } } } } } async fn expect_exec_approval( test: &TestCodex, expected_command: &str, ) -> ExecApprovalRequestEvent { let event = wait_for_event(&test.codex, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }) .await; match event { EventMsg::ExecApprovalRequest(approval) => { let last_arg = approval .command .last() .map(std::string::String::as_str) .unwrap_or_default(); assert_eq!(last_arg, expected_command); approval } EventMsg::TurnComplete(_) => panic!("expected approval request before completion"), other => panic!("unexpected event: {other:?}"), } } async fn expect_patch_approval( test: &TestCodex, expected_call_id: &str, ) -> ApplyPatchApprovalRequestEvent { let event = wait_for_event(&test.codex, |event| { matches!( event, EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_) ) }) .await; match event { EventMsg::ApplyPatchApprovalRequest(approval) => { assert_eq!(approval.call_id, expected_call_id); approval } EventMsg::TurnComplete(_) => panic!("expected patch approval request before completion"), other => panic!("unexpected event: {other:?}"), } } async fn wait_for_completion_without_approval(test: &TestCodex) { let event = wait_for_event(&test.codex, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }) .await; match event { EventMsg::TurnComplete(_) => {} EventMsg::ExecApprovalRequest(event) => { panic!("unexpected approval request: {:?}", event.command) } other => panic!("unexpected event: {other:?}"), } } async fn wait_for_completion(test: &TestCodex) { wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; } fn body_contains(req: &Request, text: &str) -> bool { let is_zstd = req .headers .get("content-encoding") .and_then(|value| value.to_str().ok()) .is_some_and(|value| { value .split(',') .any(|entry| entry.trim().eq_ignore_ascii_case("zstd")) }); let bytes = if is_zstd { zstd::stream::decode_all(std::io::Cursor::new(&req.body)).ok() } else { Some(req.body.clone()) }; bytes .and_then(|body| String::from_utf8(body).ok()) .is_some_and(|body| body.contains(text)) } async fn wait_for_spawned_thread(test: &TestCodex) -> Result> { let deadline = tokio::time::Instant::now() + Duration::from_secs(2); loop { let ids = test.thread_manager.list_thread_ids().await; if let Some(thread_id) = ids .iter() .find(|id| **id != test.session_configured.session_id) { return test .thread_manager .get_thread(*thread_id) .await .map_err(anyhow::Error::from); } if tokio::time::Instant::now() >= deadline { anyhow::bail!("timed out waiting for spawned thread"); } tokio::time::sleep(Duration::from_millis(10)).await; } } fn scenarios() -> Vec { use AskForApproval::*; let workspace_write = |network_access| SandboxPolicy::WorkspaceWrite { writable_roots: vec![], read_only_access: Default::default(), network_access, exclude_tmpdir_env_var: false, exclude_slash_tmp: false, }; vec![ ScenarioSpec { name: "danger_full_access_on_request_allows_outside_write", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_on_request.txt"), content: "danger-on-request", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("dfa_on_request.txt"), content: "danger-on-request", }, }, ScenarioSpec { name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"), content: "danger-on-request", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"), content: "danger-on-request", }, }, ScenarioSpec { name: "danger_full_access_on_request_allows_network", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::FetchUrlNoProxy { endpoint: "/dfa/network", response_body: "danger-network-ok", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::NetworkSuccess { body_contains: "danger-network-ok", }, }, ScenarioSpec { name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::FetchUrlNoProxy { endpoint: "/dfa/network", response_body: "danger-network-ok", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::NetworkSuccessNoExitCode { body_contains: "danger-network-ok", }, }, ScenarioSpec { name: "trusted_command_unless_trusted_runs_without_prompt", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunCommand { command: "echo trusted-unless", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccess { stdout_contains: "trusted-unless", }, }, ScenarioSpec { name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunCommand { command: "echo trusted-unless", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccessNoExitCode { stdout_contains: "trusted-unless", }, }, ScenarioSpec { name: "cat_redirect_unless_trusted_requires_approval", approval_policy: UnlessTrusted, sandbox_policy: workspace_write(false), action: ActionKind::RunCommand { command: r#"cat < "hello" > /var/test.txt"#, }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::CommandFailure { output_contains: "rejected by user", }, }, ScenarioSpec { name: "cat_redirect_on_request_requires_approval", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::RunCommand { command: r#"cat < "hello" > /var/test.txt"#, }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::CommandFailure { output_contains: "rejected by user", }, }, ScenarioSpec { name: "danger_full_access_on_failure_allows_outside_write", approval_policy: OnFailure, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"), content: "danger-on-failure", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"), content: "danger-on-failure", }, }, ScenarioSpec { name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit", approval_policy: OnFailure, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"), content: "danger-on-failure", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"), content: "danger-on-failure", }, }, ScenarioSpec { name: "danger_full_access_unless_trusted_requests_approval", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"), content: "danger-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"), content: "danger-unless-trusted", }, }, ScenarioSpec { name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"), content: "danger-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"), content: "danger-unless-trusted", }, }, ScenarioSpec { name: "danger_full_access_never_allows_outside_write", approval_policy: Never, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_never.txt"), content: "danger-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("dfa_never.txt"), content: "danger-never", }, }, ScenarioSpec { name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit", approval_policy: Never, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"), content: "danger-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"), content: "danger-never", }, }, ScenarioSpec { name: "read_only_on_request_requires_approval", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_on_request.txt"), content: "read-only-approval", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreated { target: TargetPath::Workspace("ro_on_request.txt"), content: "read-only-approval", }, }, ScenarioSpec { name: "read_only_on_request_requires_approval_gpt_5_1_no_exit", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_on_request_5_1.txt"), content: "read-only-approval", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::Workspace("ro_on_request_5_1.txt"), content: "read-only-approval", }, }, ScenarioSpec { name: "trusted_command_on_request_read_only_runs_without_prompt", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::RunCommand { command: "echo trusted-read-only", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccess { stdout_contains: "trusted-read-only", }, }, ScenarioSpec { name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::RunCommand { command: "echo trusted-read-only", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccessNoExitCode { stdout_contains: "trusted-read-only", }, }, ScenarioSpec { name: "read_only_on_request_blocks_network", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::FetchUrl { endpoint: "/ro/network-blocked", response_body: "should-not-see", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::Auto, expectation: Expectation::NetworkFailure { expect_tag: "ERR:" }, }, ScenarioSpec { name: "read_only_on_request_denied_blocks_execution", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_on_request_denied.txt"), content: "should-not-write", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: None, outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::FileNotCreated { target: TargetPath::Workspace("ro_on_request_denied.txt"), message_contains: &["exec command rejected by user"], }, }, #[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently ScenarioSpec { name: "read_only_on_failure_escalates_after_sandbox_error", approval_policy: OnFailure, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_on_failure.txt"), content: "read-only-on-failure", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: Some("command failed; retry without sandbox?"), }, expectation: Expectation::FileCreated { target: TargetPath::Workspace("ro_on_failure.txt"), content: "read-only-on-failure", }, }, #[cfg(not(target_os = "linux"))] ScenarioSpec { name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit", approval_policy: OnFailure, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_on_failure_5_1.txt"), content: "read-only-on-failure", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: Some("command failed; retry without sandbox?"), }, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::Workspace("ro_on_failure_5_1.txt"), content: "read-only-on-failure", }, }, ScenarioSpec { name: "read_only_on_request_network_escalates_when_approved", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::FetchUrl { endpoint: "/ro/network-approved", response_body: "read-only-network-ok", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::NetworkSuccess { body_contains: "read-only-network-ok", }, }, ScenarioSpec { name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::FetchUrl { endpoint: "/ro/network-approved", response_body: "read-only-network-ok", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::NetworkSuccessNoExitCode { body_contains: "read-only-network-ok", }, }, ScenarioSpec { name: "apply_patch_shell_command_requires_patch_approval", approval_policy: UnlessTrusted, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchShell { target: TargetPath::Workspace("apply_patch_shell.txt"), content: "shell-apply-patch", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::PatchApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::PatchApplied { target: TargetPath::Workspace("apply_patch_shell.txt"), content: "shell-apply-patch", }, }, ScenarioSpec { name: "apply_patch_function_auto_inside_workspace", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::ApplyPatchFunction { target: TargetPath::Workspace("apply_patch_function.txt"), content: "function-apply-patch", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1-codex"), outcome: Outcome::Auto, expectation: Expectation::PatchApplied { target: TargetPath::Workspace("apply_patch_function.txt"), content: "function-apply-patch", }, }, ScenarioSpec { name: "apply_patch_function_danger_allows_outside_workspace", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::ApplyPatchFunction { target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"), content: "function-patch-danger", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![Feature::ApplyPatchFreeform], model_override: Some("gpt-5.1-codex"), outcome: Outcome::Auto, expectation: Expectation::PatchApplied { target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"), content: "function-patch-danger", }, }, ScenarioSpec { name: "apply_patch_function_outside_requires_patch_approval", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchFunction { target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"), content: "function-patch-outside", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1-codex"), outcome: Outcome::PatchApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::PatchApplied { target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"), content: "function-patch-outside", }, }, ScenarioSpec { name: "apply_patch_function_outside_denied_blocks_patch", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchFunction { target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"), content: "function-patch-outside-denied", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1-codex"), outcome: Outcome::PatchApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::FileNotCreated { target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"), message_contains: &["patch rejected by user"], }, }, ScenarioSpec { name: "apply_patch_shell_command_outside_requires_patch_approval", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchShell { target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"), content: "shell-patch-outside", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::PatchApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::PatchApplied { target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"), content: "shell-patch-outside", }, }, ScenarioSpec { name: "apply_patch_function_unless_trusted_requires_patch_approval", approval_policy: UnlessTrusted, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchFunction { target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"), content: "function-patch-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1-codex"), outcome: Outcome::PatchApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::PatchApplied { target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"), content: "function-patch-unless-trusted", }, }, ScenarioSpec { name: "apply_patch_function_never_rejects_outside_workspace", approval_policy: Never, sandbox_policy: workspace_write(false), action: ActionKind::ApplyPatchFunction { target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"), content: "function-patch-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1-codex"), outcome: Outcome::Auto, expectation: Expectation::FileNotCreated { target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"), message_contains: &[ "patch rejected: writing outside of the project; rejected by user approval settings", ], }, }, ScenarioSpec { name: "read_only_unless_trusted_requires_approval", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_unless_trusted.txt"), content: "read-only-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreated { target: TargetPath::Workspace("ro_unless_trusted.txt"), content: "read-only-unless-trusted", }, }, ScenarioSpec { name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit", approval_policy: UnlessTrusted, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"), content: "read-only-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5.1"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreatedNoExitCode { target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"), content: "read-only-unless-trusted", }, }, ScenarioSpec { name: "read_only_never_reports_sandbox_failure", approval_policy: Never, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::WriteFile { target: TargetPath::Workspace("ro_never.txt"), content: "read-only-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::Auto, expectation: Expectation::FileNotCreated { target: TargetPath::Workspace("ro_never.txt"), message_contains: if cfg!(target_os = "linux") { &["Permission denied|Read-only file system"] } else { &[ "Permission denied|Operation not permitted|operation not permitted|\ Read-only file system", ] }, }, }, ScenarioSpec { name: "trusted_command_never_runs_without_prompt", approval_policy: Never, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::RunCommand { command: "echo trusted-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccess { stdout_contains: "trusted-never", }, }, ScenarioSpec { name: "workspace_write_on_request_allows_workspace_write", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::WriteFile { target: TargetPath::Workspace("ww_on_request.txt"), content: "workspace-on-request", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::FileCreated { target: TargetPath::Workspace("ww_on_request.txt"), content: "workspace-on-request", }, }, ScenarioSpec { name: "workspace_write_network_disabled_blocks_network", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::FetchUrl { endpoint: "/ww/network-blocked", response_body: "workspace-network-blocked", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::Auto, expectation: Expectation::NetworkFailure { expect_tag: "ERR:" }, }, ScenarioSpec { name: "workspace_write_on_request_requires_approval_outside_workspace", approval_policy: OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"), content: "workspace-on-request-outside", }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"), content: "workspace-on-request-outside", }, }, ScenarioSpec { name: "workspace_write_network_enabled_allows_network", approval_policy: OnRequest, sandbox_policy: workspace_write(true), action: ActionKind::FetchUrl { endpoint: "/ww/network-ok", response_body: "workspace-network-ok", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::NetworkSuccess { body_contains: "workspace-network-ok", }, }, #[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently ScenarioSpec { name: "workspace_write_on_failure_escalates_outside_workspace", approval_policy: OnFailure, sandbox_policy: workspace_write(false), action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("ww_on_failure.txt"), content: "workspace-on-failure", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: Some("command failed; retry without sandbox?"), }, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("ww_on_failure.txt"), content: "workspace-on-failure", }, }, ScenarioSpec { name: "workspace_write_unless_trusted_requires_approval_outside_workspace", approval_policy: UnlessTrusted, sandbox_policy: workspace_write(false), action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"), content: "workspace-unless-trusted", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: None, }, expectation: Expectation::FileCreated { target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"), content: "workspace-unless-trusted", }, }, ScenarioSpec { name: "workspace_write_never_blocks_outside_workspace", approval_policy: Never, sandbox_policy: workspace_write(false), action: ActionKind::WriteFile { target: TargetPath::OutsideWorkspace("ww_never.txt"), content: "workspace-never", }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![], model_override: None, outcome: Outcome::Auto, expectation: Expectation::FileNotCreated { target: TargetPath::OutsideWorkspace("ww_never.txt"), message_contains: if cfg!(target_os = "linux") { &["Permission denied|Read-only file system"] } else { &[ "Permission denied|Operation not permitted|operation not permitted|\ Read-only file system", ] }, }, }, ScenarioSpec { name: "unified exec on request no approval for safe command", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunUnifiedExecCommand { command: "echo \"hello unified exec\"", justification: None, }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![Feature::UnifiedExec], model_override: Some("gpt-5"), outcome: Outcome::Auto, expectation: Expectation::CommandSuccess { stdout_contains: "hello unified exec", }, }, #[cfg(not(all(target_os = "linux", target_arch = "aarch64")))] // Linux sandbox arg0 test workaround doesn't work on ARM ScenarioSpec { name: "unified exec on request escalated requires approval", approval_policy: OnRequest, sandbox_policy: SandboxPolicy::new_read_only_policy(), action: ActionKind::RunUnifiedExecCommand { command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'", justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION), }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![Feature::UnifiedExec], model_override: Some("gpt-5"), outcome: Outcome::ExecApproval { decision: ReviewDecision::Approved, expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION), }, expectation: Expectation::CommandSuccess { stdout_contains: "escalated unified exec", }, }, ScenarioSpec { name: "unified exec on request requires approval unless trusted", approval_policy: AskForApproval::UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunUnifiedExecCommand { command: "git reset --hard", justification: None, }, sandbox_permissions: SandboxPermissions::UseDefault, features: vec![Feature::UnifiedExec], model_override: None, outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::CommandFailure { output_contains: "rejected by user", }, }, ScenarioSpec { name: "safe command with heredoc and redirect still requires approval", approval_policy: AskForApproval::OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::RunUnifiedExecCommand { command: "cat <<'EOF' > /tmp/out.txt \nhello\nEOF", justification: None, }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![Feature::UnifiedExec], model_override: None, outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::CommandFailure { output_contains: "rejected by user", }, }, ScenarioSpec { name: "compound command with one safe command still requires approval", approval_policy: AskForApproval::OnRequest, sandbox_policy: workspace_write(false), action: ActionKind::RunUnifiedExecCommand { command: "cat ./one.txt && touch ./two.txt", justification: None, }, sandbox_permissions: SandboxPermissions::RequireEscalated, features: vec![Feature::UnifiedExec], model_override: None, outcome: Outcome::ExecApproval { decision: ReviewDecision::Denied, expected_reason: None, }, expectation: Expectation::CommandFailure { output_contains: "rejected by user", }, }, ] } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn approval_matrix_covers_all_modes() -> Result<()> { skip_if_no_network!(Ok(())); for scenario in scenarios() { run_scenario(&scenario).await?; } Ok(()) } async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> { eprintln!("running approval scenario: {}", scenario.name); let server = start_mock_server().await; let approval_policy = scenario.approval_policy; let sandbox_policy = scenario.sandbox_policy.clone(); let features = scenario.features.clone(); let model_override = scenario.model_override; let model = model_override.unwrap_or("gpt-5.1"); let mut builder = test_codex().with_model(model).with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy.clone()); for feature in features { config .features .enable(feature) .expect("test config should allow feature update"); } }); let test = builder.build(&server).await?; let call_id = scenario.name; let (event, expected_command) = scenario .action .prepare(&test, &server, call_id, scenario.sandbox_permissions) .await?; if let Some(command) = expected_command.as_deref() { eprintln!("approval scenario {} command: {command}", scenario.name); } let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-1"), event, ev_completed("resp-1"), ]), ) .await; let results_mock = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-1", "done"), ev_completed("resp-2"), ]), ) .await; submit_turn( &test, scenario.name, scenario.approval_policy, scenario.sandbox_policy.clone(), ) .await?; match &scenario.outcome { Outcome::Auto => { wait_for_completion_without_approval(&test).await; } Outcome::ExecApproval { decision, expected_reason, } => { let command = expected_command .as_deref() .expect("exec approval requires shell command"); let approval = expect_exec_approval(&test, command).await; if let Some(expected_reason) = expected_reason { assert_eq!( approval.reason.as_deref(), Some(*expected_reason), "unexpected approval reason for {}", scenario.name ); } test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: decision.clone(), }) .await?; wait_for_completion(&test).await; } Outcome::PatchApproval { decision, expected_reason, } => { let approval = expect_patch_approval(&test, call_id).await; if let Some(expected_reason) = expected_reason { assert_eq!( approval.reason.as_deref(), Some(*expected_reason), "unexpected patch approval reason for {}", scenario.name ); } test.codex .submit(Op::PatchApproval { id: approval.call_id, decision: decision.clone(), }) .await?; wait_for_completion(&test).await; } } let output_item = results_mock.single_request().function_call_output(call_id); let result = parse_result(&output_item); eprintln!( "approval scenario {} result: exit_code={:?} stdout={:?}", scenario.name, result.exit_code, result.stdout ); scenario.expectation.verify(&test, &result)?; Ok(()) } #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let approval_policy = AskForApproval::OnRequest; let sandbox_policy = SandboxPolicy::WorkspaceWrite { writable_roots: vec![], read_only_access: Default::default(), network_access: false, exclude_tmpdir_env_var: false, exclude_slash_tmp: false, }; let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex() .with_model("gpt-5.1-codex") .with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; let target = TargetPath::OutsideWorkspace("apply_patch_allow_session.txt"); let (path, patch_path) = target.resolve_for_patch(&test); let _ = fs::remove_file(&path); let patch_add = build_add_file_patch(&patch_path, "before"); let patch_update = format!( "*** Begin Patch\n*** Update File: {patch_path}\n@@\n-before\n+after\n*** End Patch\n" ); let call_id_1 = "apply_patch_allow_session_1"; let call_id_2 = "apply_patch_allow_session_2"; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-1"), ev_apply_patch_function_call(call_id_1, &patch_add), ev_completed("resp-1"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-1", "done"), ev_completed("resp-2"), ]), ) .await; submit_turn( &test, "apply_patch allow session", approval_policy, sandbox_policy.clone(), ) .await?; let approval = expect_patch_approval(&test, call_id_1).await; test.codex .submit(Op::PatchApproval { id: approval.call_id, decision: ReviewDecision::ApprovedForSession, }) .await?; wait_for_completion(&test).await; assert!(fs::read_to_string(&path)?.contains("before")); let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-3"), ev_apply_patch_function_call(call_id_2, &patch_update), ev_completed("resp-3"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-2", "done"), ev_completed("resp-4"), ]), ) .await; submit_turn( &test, "apply_patch allow session followup", approval_policy, sandbox_policy.clone(), ) .await?; let event = wait_for_event(&test.codex, |event| { matches!( event, EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_) ) }) .await; match event { EventMsg::TurnComplete(_) => {} EventMsg::ApplyPatchApprovalRequest(event) => { panic!("unexpected patch approval request: {:?}", event.call_id) } other => panic!("unexpected event: {other:?}"), } assert!(fs::read_to_string(&path)?.contains("after")); let _ = fs::remove_file(path); Ok(()) } #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> { let server = start_mock_server().await; let approval_policy = AskForApproval::UnlessTrusted; let sandbox_policy = SandboxPolicy::new_read_only_policy(); let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; let allow_prefix_path = test.cwd.path().join("allow-prefix.txt"); let _ = fs::remove_file(&allow_prefix_path); let call_id_first = "allow-prefix-first"; let (first_event, expected_command) = ActionKind::RunCommand { command: "touch allow-prefix.txt", } .prepare( &test, &server, call_id_first, SandboxPermissions::UseDefault, ) .await?; let expected_command = expected_command.expect("execpolicy amendment scenario should produce a shell command"); let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]); let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-allow-prefix-1"), first_event, ev_completed("resp-allow-prefix-1"), ]), ) .await; let first_results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-allow-prefix-1", "done"), ev_completed("resp-allow-prefix-2"), ]), ) .await; submit_turn( &test, "allow-prefix-first", approval_policy, sandbox_policy.clone(), ) .await?; let approval = expect_exec_approval(&test, expected_command.as_str()).await; assert_eq!( approval.proposed_execpolicy_amendment, Some(expected_execpolicy_amendment.clone()) ); test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::ApprovedExecpolicyAmendment { proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(), }, }) .await?; wait_for_completion(&test).await; let developer_messages = first_results .single_request() .message_input_texts("developer"); assert!( developer_messages .iter() .any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)), "expected developer message documenting saved rule, got: {developer_messages:?}" ); let policy_path = test.home.path().join("rules").join("default.rules"); let policy_contents = fs::read_to_string(&policy_path)?; assert!( policy_contents .contains(r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#), "unexpected policy contents: {policy_contents}" ); let first_output = parse_result( &first_results .single_request() .function_call_output(call_id_first), ); assert_eq!(first_output.exit_code.unwrap_or(0), 0); assert!( first_output.stdout.is_empty(), "unexpected stdout: {}", first_output.stdout ); assert_eq!( fs::read_to_string(&allow_prefix_path)?, "", "unexpected file contents after first run" ); let call_id_second = "allow-prefix-second"; let (second_event, second_command) = ActionKind::RunCommand { command: "touch allow-prefix.txt", } .prepare( &test, &server, call_id_second, SandboxPermissions::UseDefault, ) .await?; assert_eq!(second_command.as_deref(), Some(expected_command.as_str())); let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-allow-prefix-3"), second_event, ev_completed("resp-allow-prefix-3"), ]), ) .await; let second_results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-allow-prefix-2", "done"), ev_completed("resp-allow-prefix-4"), ]), ) .await; submit_turn( &test, "allow-prefix-second", approval_policy, sandbox_policy.clone(), ) .await?; wait_for_completion_without_approval(&test).await; let second_output = parse_result( &second_results .single_request() .function_call_output(call_id_second), ); assert_eq!(second_output.exit_code.unwrap_or(0), 0); assert!( second_output.stdout.is_empty(), "unexpected stdout: {}", second_output.stdout ); assert_eq!( fs::read_to_string(&allow_prefix_path)?, "", "unexpected file contents after second run" ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn spawned_subagent_execpolicy_amendment_propagates_to_parent_session() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let approval_policy = AskForApproval::UnlessTrusted; let sandbox_policy = SandboxPolicy::new_read_only_policy(); let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); config .features .enable(Feature::Collab) .expect("test config should allow feature update"); }); let test = builder.build(&server).await?; const PARENT_PROMPT: &str = "spawn a child that repeats a command"; const CHILD_PROMPT: &str = "run the same command twice"; const SPAWN_CALL_ID: &str = "spawn-child-1"; const CHILD_CALL_ID_1: &str = "child-touch-1"; const PARENT_CALL_ID_2: &str = "parent-touch-2"; let child_file = test.cwd.path().join("subagent-allow-prefix.txt"); let _ = fs::remove_file(&child_file); let spawn_args = serde_json::to_string(&json!({ "message": CHILD_PROMPT, }))?; mount_sse_once_match( &server, |req: &Request| body_contains(req, PARENT_PROMPT), sse(vec![ ev_response_created("resp-parent-1"), ev_function_call(SPAWN_CALL_ID, "spawn_agent", &spawn_args), ev_completed("resp-parent-1"), ]), ) .await; let child_cmd_args = serde_json::to_string(&json!({ "command": "touch subagent-allow-prefix.txt", "timeout_ms": 1_000, "prefix_rule": ["touch", "subagent-allow-prefix.txt"], }))?; mount_sse_once_match( &server, |req: &Request| body_contains(req, CHILD_PROMPT) && !body_contains(req, SPAWN_CALL_ID), sse(vec![ ev_response_created("resp-child-1"), ev_function_call(CHILD_CALL_ID_1, "shell_command", &child_cmd_args), ev_completed("resp-child-1"), ]), ) .await; mount_sse_once_match( &server, |req: &Request| body_contains(req, CHILD_CALL_ID_1), sse(vec![ ev_response_created("resp-child-2"), ev_assistant_message("msg-child-2", "child done"), ev_completed("resp-child-2"), ]), ) .await; mount_sse_once_match( &server, |req: &Request| body_contains(req, SPAWN_CALL_ID), sse(vec![ ev_response_created("resp-parent-2"), ev_assistant_message("msg-parent-2", "parent done"), ev_completed("resp-parent-2"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-parent-3"), ev_function_call(PARENT_CALL_ID_2, "shell_command", &child_cmd_args), ev_completed("resp-parent-3"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-parent-4"), ev_assistant_message("msg-parent-4", "parent rerun done"), ev_completed("resp-parent-4"), ]), ) .await; submit_turn( &test, PARENT_PROMPT, approval_policy, sandbox_policy.clone(), ) .await?; let child = wait_for_spawned_thread(&test).await?; let approval_event = wait_for_event_with_timeout( &child, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }, Duration::from_secs(2), ) .await; let EventMsg::ExecApprovalRequest(approval) = approval_event else { panic!("expected child approval before completion"); }; let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec![ "touch".to_string(), "subagent-allow-prefix.txt".to_string(), ]); assert_eq!( approval.proposed_execpolicy_amendment, Some(expected_execpolicy_amendment.clone()) ); child .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::ApprovedExecpolicyAmendment { proposed_execpolicy_amendment: expected_execpolicy_amendment, }, }) .await?; let child_event = wait_for_event_with_timeout( &child, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }, Duration::from_secs(2), ) .await; match child_event { EventMsg::TurnComplete(_) => {} EventMsg::ExecApprovalRequest(ev) => { panic!("unexpected second child approval request: {:?}", ev.command) } other => panic!("unexpected event: {other:?}"), } assert!( child_file.exists(), "expected subagent command to create file" ); fs::remove_file(&child_file)?; assert!( !child_file.exists(), "expected child file to be removed before parent rerun" ); submit_turn( &test, "parent reruns child command", approval_policy, sandbox_policy, ) .await?; wait_for_completion_without_approval(&test).await; Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[cfg(unix)] async fn matched_prefix_rule_runs_unsandboxed_under_zsh_fork() -> Result<()> { skip_if_no_network!(Ok(())); let Some(runtime) = zsh_fork_runtime("zsh-fork prefix rule unsandboxed test")? else { return Ok(()); }; let approval_policy = AskForApproval::Never; let sandbox_policy = restrictive_workspace_write_policy(); let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?; let outside_path = outside_dir .path() .join("zsh-fork-prefix-rule-unsandboxed.txt"); let command = format!("touch {outside_path:?}"); let rules = r#"prefix_rule(pattern=["touch"], decision="allow")"#.to_string(); let server = start_mock_server().await; let outside_path_for_hook = outside_path.clone(); let test = build_zsh_fork_test( &server, runtime, approval_policy, sandbox_policy.clone(), move |home| { let _ = fs::remove_file(&outside_path_for_hook); let rules_dir = home.join("rules"); fs::create_dir_all(&rules_dir).unwrap(); fs::write(rules_dir.join("default.rules"), &rules).unwrap(); }, ) .await?; let call_id = "zsh-fork-prefix-rule-unsandboxed"; let event = shell_event( call_id, &command, /*timeout_ms*/ 1_000, SandboxPermissions::UseDefault, )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-zsh-fork-prefix-1"), event, ev_completed("resp-zsh-fork-prefix-1"), ]), ) .await; let results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-zsh-fork-prefix-1", "done"), ev_completed("resp-zsh-fork-prefix-2"), ]), ) .await; submit_turn( &test, "run allowed touch under zsh fork", approval_policy, sandbox_policy, ) .await?; wait_for_completion_without_approval(&test).await; let result = parse_result(&results.single_request().function_call_output(call_id)); assert_eq!(result.exit_code.unwrap_or(0), 0); assert!( outside_path.exists(), "expected matched prefix_rule to rerun touch unsandboxed; output: {}", result.stdout ); Ok(()) } #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Result<()> { let server = start_mock_server().await; let approval_policy = AskForApproval::OnRequest; let sandbox_policy = SandboxPolicy::new_read_only_policy(); let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; let call_id = "invalid-prefix-rule"; let command = "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt"; let event = shell_event_with_prefix_rule( call_id, command, /*timeout_ms*/ 1_000, SandboxPermissions::RequireEscalated, Some(vec!["touch".to_string()]), )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-invalid-prefix-1"), event, ev_completed("resp-invalid-prefix-1"), ]), ) .await; submit_turn( &test, "invalid-prefix-rule", approval_policy, sandbox_policy.clone(), ) .await?; let approval = expect_exec_approval(&test, command).await; let amendment = approval .proposed_execpolicy_amendment .expect("should have a proposed execpolicy amendment"); assert!(amendment.command.contains(&command.to_string())); Ok(()) } #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn approving_fallback_rule_for_compound_command_works() -> Result<()> { let server = start_mock_server().await; let approval_policy = AskForApproval::OnRequest; let sandbox_policy = SandboxPolicy::new_read_only_policy(); let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; let call_id = "invalid-prefix-rule"; let command = "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt"; let event = shell_event_with_prefix_rule( call_id, command, /*timeout_ms*/ 1_000, SandboxPermissions::RequireEscalated, Some(vec!["touch".to_string()]), )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-invalid-prefix-1"), event, ev_completed("resp-invalid-prefix-1"), ]), ) .await; submit_turn( &test, "invalid-prefix-rule", approval_policy, sandbox_policy.clone(), ) .await?; let approval = expect_exec_approval(&test, command).await; let approval_id = approval.effective_approval_id(); let amendment = approval .proposed_execpolicy_amendment .expect("should have a proposed execpolicy amendment"); assert!(amendment.command.contains(&command.to_string())); test.codex .submit(Op::ExecApproval { id: approval_id, turn_id: None, decision: ReviewDecision::ApprovedExecpolicyAmendment { proposed_execpolicy_amendment: amendment.clone(), }, }) .await?; wait_for_completion(&test).await; let call_id = "invalid-prefix-rule-again"; let command = "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt"; let event = shell_event_with_prefix_rule( call_id, command, /*timeout_ms*/ 1_000, SandboxPermissions::RequireEscalated, Some(vec!["touch".to_string()]), )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-invalid-prefix-1"), event, ev_completed("resp-invalid-prefix-1"), ]), ) .await; let second_results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-invalid-prefix-1", "done"), ev_completed("resp-invalid-prefix-2"), ]), ) .await; submit_turn( &test, "invalid-prefix-rule", approval_policy, sandbox_policy.clone(), ) .await?; wait_for_completion_without_approval(&test).await; let second_output = parse_result( &second_results .single_request() .function_call_output(call_id), ); assert_eq!(second_output.exit_code.unwrap_or(0), 0); assert!( second_output.stdout.is_empty(), "unexpected stdout: {}", second_output.stdout ); Ok(()) } #[tokio::test(flavor = "current_thread")] async fn denying_network_policy_amendment_persists_policy_and_skips_future_network_prompt() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let home = Arc::new(TempDir::new()?); fs::write( home.path().join("config.toml"), r#"default_permissions = "workspace" [permissions.workspace.filesystem] ":minimal" = "read" [permissions.workspace.network] enabled = true mode = "limited" allow_local_binding = true "#, )?; let approval_policy = AskForApproval::OnFailure; let sandbox_policy = SandboxPolicy::WorkspaceWrite { writable_roots: vec![], read_only_access: Default::default(), network_access: true, exclude_tmpdir_env_var: false, exclude_slash_tmp: false, }; let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_home(home).with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); let layers = config .config_layer_stack .get_layers( ConfigLayerStackOrdering::LowestPrecedenceFirst, /*include_disabled*/ true, ) .into_iter() .cloned() .collect(); let mut requirements = config.config_layer_stack.requirements().clone(); requirements.network = Some(Sourced::new( NetworkConstraints { enabled: Some(true), allow_local_binding: Some(true), ..Default::default() }, RequirementSource::CloudRequirements, )); let mut requirements_toml = config.config_layer_stack.requirements_toml().clone(); requirements_toml.network = Some(NetworkRequirementsToml { enabled: Some(true), allow_local_binding: Some(true), ..Default::default() }); config.config_layer_stack = ConfigLayerStack::new(layers, requirements, requirements_toml) .expect("rebuild config layer stack with network requirements"); }); let test = builder.build(&server).await?; assert!( test.config.managed_network_requirements_enabled(), "expected managed network requirements to be enabled" ); assert!( test.config.permissions.network.is_some(), "expected managed network proxy config to be present" ); test.session_configured .network_proxy .as_ref() .expect("expected runtime managed network proxy addresses"); let call_id_first = "allow-network-first"; // Use urllib without overriding proxy settings so managed-network sessions // continue to exercise the env-based proxy routing path under bubblewrap. let fetch_command = r#"python3 -c "import urllib.request; opener = urllib.request.build_opener(urllib.request.ProxyHandler()); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))""# .to_string(); let first_event = shell_event( call_id_first, &fetch_command, /*timeout_ms*/ 30_000, SandboxPermissions::UseDefault, )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-allow-network-1"), first_event, ev_completed("resp-allow-network-1"), ]), ) .await; let first_results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-allow-network-1", "done"), ev_completed("resp-allow-network-2"), ]), ) .await; submit_turn( &test, "allow-network-first", approval_policy, sandbox_policy.clone(), ) .await?; let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30); let approval = loop { let remaining = deadline .checked_duration_since(std::time::Instant::now()) .expect("timed out waiting for network approval request"); let event = wait_for_event_with_timeout( &test.codex, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }, remaining, ) .await; match event { EventMsg::ExecApprovalRequest(approval) => { if approval.command.first().map(std::string::String::as_str) == Some("network-access") { break approval; } test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::Approved, }) .await?; } EventMsg::TurnComplete(_) => { panic!("expected network approval request before completion"); } other => panic!("unexpected event: {other:?}"), } }; let network_context = approval .network_approval_context .clone() .expect("expected network approval context"); assert_eq!(network_context.protocol, NetworkApprovalProtocol::Http); let expected_network_amendments = vec![ NetworkPolicyAmendment { host: network_context.host.clone(), action: NetworkPolicyRuleAction::Allow, }, NetworkPolicyAmendment { host: network_context.host.clone(), action: NetworkPolicyRuleAction::Deny, }, ]; assert_eq!( approval.proposed_network_policy_amendments, Some(expected_network_amendments.clone()) ); let deny_network_amendment = expected_network_amendments .into_iter() .find(|amendment| amendment.action == NetworkPolicyRuleAction::Deny) .expect("expected deny network policy amendment"); test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::NetworkPolicyAmendment { network_policy_amendment: deny_network_amendment.clone(), }, }) .await?; wait_for_completion(&test).await; let policy_path = test.home.path().join("rules").join("default.rules"); let policy_contents = fs::read_to_string(&policy_path)?; let expected_rule = format!( r#"network_rule(host="{}", protocol="{}", decision="deny", justification="Deny {} access to {}")"#, deny_network_amendment.host, match network_context.protocol { NetworkApprovalProtocol::Http => "http", NetworkApprovalProtocol::Https => "https_connect", NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp", NetworkApprovalProtocol::Socks5Udp => "socks5_udp", }, match network_context.protocol { NetworkApprovalProtocol::Http => "http", NetworkApprovalProtocol::Https => "https_connect", NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp", NetworkApprovalProtocol::Socks5Udp => "socks5_udp", }, deny_network_amendment.host ); assert!( policy_contents.contains(&expected_rule), "unexpected policy contents: {policy_contents}" ); let first_output = parse_result( &first_results .single_request() .function_call_output(call_id_first), ); Expectation::CommandFailure { output_contains: "", } .verify(&test, &first_output)?; let call_id_second = "allow-network-second"; let second_event = shell_event( call_id_second, &fetch_command, /*timeout_ms*/ 30_000, SandboxPermissions::UseDefault, )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-allow-network-3"), second_event, ev_completed("resp-allow-network-3"), ]), ) .await; let second_results = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-allow-network-2", "done"), ev_completed("resp-allow-network-4"), ]), ) .await; submit_turn( &test, "allow-network-second", approval_policy, sandbox_policy.clone(), ) .await?; let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30); loop { let remaining = deadline .checked_duration_since(std::time::Instant::now()) .expect("timed out waiting for second turn completion"); let event = wait_for_event_with_timeout( &test.codex, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }, remaining, ) .await; match event { EventMsg::ExecApprovalRequest(approval) => { if approval.command.first().map(std::string::String::as_str) == Some("network-access") { panic!( "unexpected network approval request: {:?}", approval.command ); } test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::Approved, }) .await?; } EventMsg::TurnComplete(_) => break, other => panic!("unexpected event: {other:?}"), } } let second_output = parse_result( &second_results .single_request() .function_call_output(call_id_second), ); Expectation::CommandFailure { output_contains: "", } .verify(&test, &second_output)?; Ok(()) } #[tokio::test(flavor = "current_thread")] async fn network_approval_flow_survives_danger_full_access_session_start() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let home = Arc::new(TempDir::new()?); fs::write( home.path().join("config.toml"), r#"default_permissions = "workspace" [permissions.workspace.filesystem] ":minimal" = "read" [permissions.workspace.network] enabled = true mode = "limited" allow_local_binding = true "#, )?; let approval_policy = AskForApproval::OnFailure; let turn_sandbox_policy = SandboxPolicy::WorkspaceWrite { writable_roots: vec![], read_only_access: Default::default(), network_access: true, exclude_tmpdir_env_var: false, exclude_slash_tmp: false, }; let mut builder = test_codex().with_home(home).with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(SandboxPolicy::DangerFullAccess); let layers = config .config_layer_stack .get_layers( ConfigLayerStackOrdering::LowestPrecedenceFirst, /*include_disabled*/ true, ) .into_iter() .cloned() .collect(); let mut requirements = config.config_layer_stack.requirements().clone(); requirements.network = Some(Sourced::new( NetworkConstraints { enabled: Some(true), allow_local_binding: Some(true), ..Default::default() }, RequirementSource::CloudRequirements, )); let mut requirements_toml = config.config_layer_stack.requirements_toml().clone(); requirements_toml.network = Some(NetworkRequirementsToml { enabled: Some(true), allow_local_binding: Some(true), ..Default::default() }); config.config_layer_stack = ConfigLayerStack::new(layers, requirements, requirements_toml) .expect("rebuild config layer stack with network requirements"); }); let test = builder.build(&server).await?; assert!( !test.config.managed_network_requirements_enabled(), "expected managed network requirements to stay inactive in danger-full-access" ); assert!( test.config.permissions.network.is_some(), "expected managed network proxy config to be present" ); assert!( test.session_configured.network_proxy.is_none(), "expected session configured event to hide managed network proxy in danger-full-access" ); let call_id = "allow-network-after-yolo"; let fetch_command = r#"python3 -c "import urllib.request; opener = urllib.request.build_opener(urllib.request.ProxyHandler()); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))""# .to_string(); let event = shell_event( call_id, &fetch_command, /*timeout_ms*/ 30_000, SandboxPermissions::UseDefault, )?; let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-network-after-yolo-1"), event, ev_completed("resp-network-after-yolo-1"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-network-after-yolo-1", "done"), ev_completed("resp-network-after-yolo-2"), ]), ) .await; submit_turn( &test, "allow-network-after-yolo", approval_policy, turn_sandbox_policy, ) .await?; let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30); let approval = loop { let remaining = deadline .checked_duration_since(std::time::Instant::now()) .expect("timed out waiting for network approval request"); let event = wait_for_event_with_timeout( &test.codex, |event| { matches!( event, EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) ) }, remaining, ) .await; match event { EventMsg::ExecApprovalRequest(approval) => { if approval.command.first().map(std::string::String::as_str) == Some("network-access") { break approval; } test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::Approved, }) .await?; } EventMsg::TurnComplete(_) => { panic!("expected network approval request before completion"); } other => panic!("unexpected event: {other:?}"), } }; let network_context = approval .network_approval_context .clone() .expect("expected network approval context"); assert_eq!(network_context.protocol, NetworkApprovalProtocol::Http); test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::Denied, }) .await?; wait_for_completion(&test).await; Ok(()) } // todo(dylan) add ScenarioSpec support for rules #[tokio::test(flavor = "current_thread")] #[cfg(unix)] async fn compound_command_with_one_safe_command_still_requires_approval() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let approval_policy = AskForApproval::UnlessTrusted; let sandbox_policy = SandboxPolicy::new_workspace_write_policy(); let sandbox_policy_for_config = sandbox_policy.clone(); let mut builder = test_codex().with_config(move |config| { config.permissions.approval_policy = Constrained::allow_any(approval_policy); config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config); }); let test = builder.build(&server).await?; let rules_dir = test.home.path().join("rules"); fs::create_dir_all(&rules_dir)?; fs::write( rules_dir.join("default.rules"), r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#, )?; let call_id = "heredoc-with-chained-prefix"; let command = "touch ./test.txt && rm ./test.txt"; let (event, expected_command) = ActionKind::RunCommand { command } .prepare(&test, &server, call_id, SandboxPermissions::UseDefault) .await?; let expected_command = expected_command.expect("compound command should produce a shell command"); let _ = mount_sse_once( &server, sse(vec![ ev_response_created("resp-heredoc-prefix-1"), event, ev_completed("resp-heredoc-prefix-1"), ]), ) .await; let _ = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-heredoc-prefix-1", "done"), ev_completed("resp-heredoc-prefix-2"), ]), ) .await; submit_turn( &test, "compound command", approval_policy, sandbox_policy.clone(), ) .await?; let approval = expect_exec_approval(&test, expected_command.as_str()).await; test.codex .submit(Op::ExecApproval { id: approval.effective_approval_id(), turn_id: None, decision: ReviewDecision::Denied, }) .await?; wait_for_completion(&test).await; Ok(()) }