Compare commits

...

1 Commits

Author SHA1 Message Date
Eva Wong
0f921c5d3e Require approval for fake shell escalation 2026-05-14 21:55:24 +02:00
12 changed files with 239 additions and 75 deletions

View File

@@ -7,29 +7,32 @@ const CANONICAL_POWERSHELL_SCRIPT_PREFIX: &str = "__codex_powershell_script__";
/// Canonicalize command argv for approval-cache matching.
///
/// This keeps approval decisions stable across wrapper-path differences (for
/// example `/bin/bash -lc` vs `bash -lc`) and across shell wrapper tools while
/// preserving exact script text for complex scripts where we cannot safely
/// recover a tokenized command sequence.
/// This keeps approval decisions stable across shell script whitespace changes
/// while preserving the shell executable identity. The executable is part of the
/// key because it starts before the shell script and can be attacker controlled.
pub(crate) fn canonicalize_command_for_approval(command: &[String]) -> Vec<String> {
if let Some(commands) = parse_shell_lc_plain_commands(command)
&& let [single_command] = commands.as_slice()
{
return single_command.clone();
}
if let Some((_shell, script)) = extract_bash_command(command) {
if let Some((shell, script)) = extract_bash_command(command) {
let shell_mode = command.get(1).cloned().unwrap_or_default();
if let Some(commands) = parse_shell_lc_plain_commands(command)
&& let [single_command] = commands.as_slice()
{
let mut canonical_command = vec![shell.to_string(), shell_mode];
canonical_command.extend(single_command.clone());
return canonical_command;
}
return vec![
CANONICAL_BASH_SCRIPT_PREFIX.to_string(),
shell.to_string(),
shell_mode,
script.to_string(),
];
}
if let Some((_shell, script)) = extract_powershell_command(command) {
if let Some((shell, script)) = extract_powershell_command(command) {
return vec![
CANONICAL_POWERSHELL_SCRIPT_PREFIX.to_string(),
shell.to_string(),
script.to_string(),
];
}

View File

@@ -2,7 +2,7 @@ use super::canonicalize_command_for_approval;
use pretty_assertions::assert_eq;
#[test]
fn canonicalizes_word_only_shell_scripts_to_inner_command() {
fn canonicalizes_word_only_shell_scripts_with_shell_identity() {
let command_a = vec![
"/bin/bash".to_string(),
"-lc".to_string(),
@@ -17,13 +17,15 @@ fn canonicalizes_word_only_shell_scripts_to_inner_command() {
assert_eq!(
canonicalize_command_for_approval(&command_a),
vec![
"/bin/bash".to_string(),
"-lc".to_string(),
"cargo".to_string(),
"test".to_string(),
"-p".to_string(),
"codex-core".to_string(),
]
);
assert_eq!(
assert_ne!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);
@@ -43,11 +45,12 @@ fn canonicalizes_heredoc_scripts_to_stable_script_key() {
canonicalize_command_for_approval(&command_a),
vec![
"__codex_shell_script__".to_string(),
"/bin/zsh".to_string(),
"-lc".to_string(),
script.to_string(),
]
);
assert_eq!(
assert_ne!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);
@@ -72,10 +75,11 @@ fn canonicalizes_powershell_wrappers_to_stable_script_key() {
canonicalize_command_for_approval(&command_a),
vec![
"__codex_powershell_script__".to_string(),
"powershell.exe".to_string(),
script.to_string(),
]
);
assert_eq!(
assert_ne!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);

View File

@@ -246,6 +246,7 @@ pub(crate) struct ExecApprovalRequest<'a> {
pub(crate) sandbox_cwd: &'a Path,
pub(crate) sandbox_permissions: SandboxPermissions,
pub(crate) prefix_rule: Option<Vec<String>>,
pub(crate) allow_execpolicy_amendment: bool,
}
impl ExecPolicyManager {
@@ -281,6 +282,7 @@ impl ExecPolicyManager {
sandbox_cwd,
sandbox_permissions,
prefix_rule,
allow_execpolicy_amendment,
} = req;
let exec_policy = self.current();
let ExecPolicyCommands {
@@ -291,7 +293,9 @@ impl ExecPolicyManager {
// Keep heredoc prefix parsing for rule evaluation so existing
// allow/prompt/forbidden rules still apply, but avoid auto-derived
// amendments when only the heredoc fallback parser matched.
let auto_amendment_allowed = !used_complex_parsing;
let auto_amendment_allowed = !used_complex_parsing
&& allow_execpolicy_amendment
&& !sandbox_permissions.requests_sandbox_override();
let exec_policy_fallback = |cmd: &[String]| {
render_decision_for_unmatched_command(
cmd,
@@ -649,7 +653,6 @@ pub(crate) fn render_decision_for_unmatched_command(
codex_shell_command::is_safe_command::is_safe_powershell_words(command)
}
};
// On Windows, ReadOnly sandbox is not a real sandbox, so special-case it
// here.
let environment_lacks_sandbox_protections = cfg!(windows)
@@ -701,6 +704,15 @@ pub(crate) fn render_decision_for_unmatched_command(
};
}
if sandbox_permissions.requests_sandbox_override()
&& matches!(
file_system_sandbox_policy.kind,
FileSystemSandboxKind::Restricted
)
{
return Decision::Prompt;
}
match approval_policy {
AskForApproval::Never | AskForApproval::OnFailure => {
// We allow the command to run, relying on the sandbox for

View File

@@ -938,14 +938,55 @@ EOF"#
},
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"zsh".to_string(),
proposed_execpolicy_amendment: None,
},
)
.await;
}
#[tokio::test]
async fn known_safe_shell_wrapper_with_escalation_requires_approval() {
assert_exec_approval_requirement_for_command(
ExecApprovalRequirementScenario {
policy_src: None,
command: vec![
"/tmp/cc01_fake/bash".to_string(),
"-lc".to_string(),
r#"cat <<'EOF' > /some/important/folder/test.txt
hello world
EOF"#
.to_string(),
])),
"ls".to_string(),
],
approval_policy: AskForApproval::OnRequest,
sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: None,
},
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: None,
},
)
.await;
}
#[tokio::test]
async fn known_safe_shell_wrapper_with_on_failure_escalation_requires_approval() {
assert_exec_approval_requirement_for_command(
ExecApprovalRequirementScenario {
policy_src: None,
command: vec![
"/tmp/cc01_fake/bash".to_string(),
"-lc".to_string(),
"ls".to_string(),
],
approval_policy: AskForApproval::OnFailure,
sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: None,
},
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: None,
},
)
.await;
@@ -1265,10 +1306,7 @@ async fn exec_approval_requirement_prompts_for_inline_additional_permissions_und
},
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"touch".to_string(),
"requested-dir/requested-but-unused.txt".to_string(),
])),
proposed_execpolicy_amendment: None,
},
)
.await;
@@ -1288,10 +1326,7 @@ async fn exec_approval_requirement_prompts_for_known_safe_escalation_under_on_re
},
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"echo".to_string(),
"hello".to_string(),
])),
proposed_execpolicy_amendment: None,
},
)
.await;
@@ -1380,6 +1415,7 @@ async fn mixed_rule_and_sandbox_prompt_prioritizes_rule_for_rejection_decision()
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
@@ -1420,6 +1456,7 @@ async fn mixed_rule_and_sandbox_prompt_rejects_when_granular_rules_are_disabled(
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
@@ -1447,6 +1484,7 @@ async fn exec_approval_requirement_falls_back_to_heuristics() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
@@ -1475,6 +1513,7 @@ async fn empty_bash_lc_script_falls_back_to_original_command() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
@@ -1507,6 +1546,7 @@ async fn whitespace_bash_lc_script_falls_back_to_original_command() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
@@ -1539,6 +1579,7 @@ async fn request_rule_uses_prefix_rule() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
allow_execpolicy_amendment: true,
})
.await;
@@ -1546,10 +1587,7 @@ async fn request_rule_uses_prefix_rule() {
requirement,
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"cargo".to_string(),
"install".to_string(),
])),
proposed_execpolicy_amendment: None,
}
);
}
@@ -1572,6 +1610,7 @@ async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands(
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
allow_execpolicy_amendment: true,
})
.await;
@@ -1579,11 +1618,7 @@ async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands(
requirement,
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"rm".to_string(),
"-rf".to_string(),
"/tmp/codex".to_string(),
])),
proposed_execpolicy_amendment: None,
}
);
}
@@ -1612,6 +1647,7 @@ async fn heuristics_apply_when_other_commands_match_policy() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await,
ExecApprovalRequirement::NeedsApproval {
@@ -2105,6 +2141,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: permissions,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await,
"{pwsh_approval_reason}"
@@ -2132,6 +2169,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: permissions,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await,
r#"On all platforms, a forbidden command should require approval
@@ -2155,6 +2193,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions: permissions,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await,
r#"On all platforms, a forbidden command should require approval
@@ -2256,6 +2295,7 @@ async fn exec_approval_requirement_for_command(
sandbox_cwd: Path::new("/tmp"),
sandbox_permissions,
prefix_rule,
allow_execpolicy_amendment: true,
})
.await
}

View File

@@ -9533,6 +9533,7 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
sandbox_cwd: turn_context.cwd.as_path(),
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
allow_execpolicy_amendment: true,
})
.await;
assert!(matches!(

View File

@@ -193,6 +193,7 @@ async fn run_exec_like(args: RunExecLikeArgs) -> Result<FunctionToolOutput, Func
effective_additional_permissions.sandbox_permissions
},
prefix_rule,
allow_execpolicy_amendment: true,
})
.await;

View File

@@ -130,6 +130,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
let environment = Arc::clone(&turn_environment.environment);
let fs = environment.get_filesystem();
let args: ExecCommandArgs = parse_arguments_with_base_path(&arguments, &cwd)?;
let model_provided_shell = args.shell.is_some();
let hook_command = args.cmd.clone();
maybe_emit_implicit_skill_invocation(
session.as_ref(),
@@ -268,6 +269,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
.permissions_preapproved,
justification,
prefix_rule,
allow_execpolicy_amendment: !model_provided_shell,
},
&context,
)

View File

@@ -103,6 +103,7 @@ pub(crate) struct ExecCommandRequest {
pub additional_permissions_preapproved: bool,
pub justification: Option<String>,
pub prefix_rule: Option<Vec<String>>,
pub allow_execpolicy_amendment: bool,
}
#[derive(Debug)]

View File

@@ -1037,6 +1037,7 @@ impl UnifiedExecProcessManager {
request.sandbox_permissions
},
prefix_rule: request.prefix_rule.clone(),
allow_execpolicy_amendment: request.allow_execpolicy_amendment,
})
.await;
let req = UnifiedExecToolRequest {

View File

@@ -191,6 +191,7 @@ async fn failed_initial_end_for_unstored_process_uses_fallback_output() {
additional_permissions_preapproved: false,
justification: None,
prefix_rule: None,
allow_execpolicy_amendment: true,
};
let transcript = Arc::new(tokio::sync::Mutex::new(HeadTailBuffer::default()));

View File

@@ -2657,17 +2657,14 @@ async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Resu
.await?;
let approval = expect_exec_approval(&test, command).await;
let amendment = approval
.proposed_execpolicy_amendment
.expect("should have a proposed execpolicy amendment");
assert!(amendment.command.contains(&command.to_string()));
assert_eq!(approval.proposed_execpolicy_amendment, None);
Ok(())
}
#[tokio::test(flavor = "current_thread")]
#[cfg(unix)]
async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
async fn approving_fallback_rule_for_sandbox_override_is_not_persisted() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
@@ -2711,18 +2708,13 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
let approval = expect_exec_approval(&test, command).await;
let approval_id = approval.effective_approval_id();
let amendment = approval
.proposed_execpolicy_amendment
.expect("should have a proposed execpolicy amendment");
assert!(amendment.command.contains(&command.to_string()));
assert_eq!(approval.proposed_execpolicy_amendment, None);
test.codex
.submit(Op::ExecApproval {
id: approval_id,
turn_id: None,
decision: ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: amendment.clone(),
},
decision: ReviewDecision::Approved,
})
.await?;
wait_for_completion(&test).await;
@@ -2747,14 +2739,6 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
]),
)
.await;
let second_results = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-invalid-prefix-1", "done"),
ev_completed("resp-invalid-prefix-2"),
]),
)
.await;
submit_turn(
&test,
@@ -2764,19 +2748,8 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
)
.await?;
wait_for_completion_without_approval(&test).await;
let second_output = parse_result(
&second_results
.single_request()
.function_call_output(call_id),
);
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
assert!(
second_output.stdout.is_empty(),
"unexpected stdout: {}",
second_output.stdout
);
let second_approval = expect_exec_approval(&test, command).await;
assert_eq!(second_approval.proposed_execpolicy_amendment, None);
Ok(())
}

View File

@@ -1,10 +1,14 @@
use std::collections::HashMap;
use std::env;
use std::ffi::OsStr;
use std::fs;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use std::sync::OnceLock;
use anyhow::Context;
use anyhow::Result;
use codex_core::sandboxing::SandboxPermissions;
use codex_exec_server::CreateDirectoryOptions;
use codex_features::Feature;
use codex_protocol::models::PermissionProfile;
@@ -423,6 +427,127 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[cfg(unix)]
async fn unified_exec_fake_shell_escalation_requires_approval() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5.2").with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config
.features
.enable(Feature::UnifiedExec)
.expect("test config should allow feature update");
});
let test = builder.build_remote_aware(&server).await?;
let fake_shell_dir = test.config.cwd.join("cc01_fake");
fs::create_dir_all(&fake_shell_dir)?;
let fake_shell = fake_shell_dir.join("bash");
let proof_file = env::current_dir()?.join("cli_8725_fake_shell_proof.txt");
let _ = fs::remove_file(&proof_file);
fs::write(
&fake_shell,
format!(
"#!/bin/sh\nprintf 'fake shell escaped' > {}\nexit 0\n",
codex_shell_command::parse_command::shlex_join(&[proof_file
.to_string_lossy()
.to_string()]),
),
)?;
let mut permissions = fs::metadata(&fake_shell)?.permissions();
permissions.set_mode(0o755);
fs::set_permissions(&fake_shell, permissions)?;
let call_id = "uexec-fake-shell-escalation";
let args = json!({
"cmd": "ls",
"shell": fake_shell.to_string_lossy().to_string(),
"yield_time_ms": 1_000,
"sandbox_permissions": SandboxPermissions::RequireEscalated,
"justification": "validate fake shell sandbox escape guard",
});
mount_sse_sequence(
&server,
vec![sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
])],
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "run fake shell escalation regression".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.config.cwd.to_path_buf(),
approval_policy: AskForApproval::OnRequest,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
permission_profile: None,
model: session_model,
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.await?;
let event = wait_for_event_with_timeout(
&test.codex,
|event| {
matches!(
event,
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
)
},
Duration::from_secs(5),
)
.await;
match event {
EventMsg::ExecApprovalRequest(approval) => {
assert_eq!(
approval.command,
vec![
fake_shell.to_string_lossy().to_string(),
"-lc".to_string(),
"ls".to_string(),
],
);
assert_eq!(approval.proposed_execpolicy_amendment, None);
}
EventMsg::TurnComplete(_) => {
assert!(
!proof_file.exists(),
"fake shell ran without approval and created outside proof file at {}",
proof_file.display(),
);
panic!("expected exec approval before turn completion");
}
other => panic!("unexpected event: {other:?}"),
}
assert!(
!proof_file.exists(),
"fake shell should not run before approval: {}",
proof_file.display(),
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_resolves_relative_workdir() -> Result<()> {
skip_if_no_network!(Ok(()));