feat: include sandbox config with escalation request (#12839)

## Why

Before this change, an escalation approval could say that a command
should be rerun, but it could not carry the sandbox configuration that
should still apply when the escalated command is actually spawned.

That left an unsafe gap in the `zsh-fork` skill path: skill scripts
under `scripts/` that did not declare permissions could be escalated
without a sandbox, and scripts that did declare permissions could lose
their bounded sandbox on rerun or cached session approval.

This PR extends the escalation protocol so approvals can optionally
carry sandbox configuration all the way through execution. That lets the
shell runtime preserve the intended sandbox instead of silently widening
access.

We likely want a single permissions type for this codepath eventually,
probably centered on `Permissions`. For now, the protocol needs to
represent both the existing `PermissionProfile` form and the fuller
`Permissions` form, so this introduces a temporary disjoint union,
`EscalationPermissions`, to carry either one.

Further, this means that today, a skill either:

- does not declare any permissions, in which case it is run using the
default sandbox for the turn
- specifies permissions, in which case the skill is run using that exact
sandbox, which might be more restrictive than the default sandbox for
the turn

We will likely change the skill's permissions to be additive to the
existing permissions for the turn.

## What Changed

- Added `EscalationPermissions` to `codex-protocol` so escalation
requests can carry either a `PermissionProfile` or a full `Permissions`
payload.
- Added an explicit `EscalationExecution` mode to the shell escalation
protocol so reruns distinguish between `Unsandboxed`, `TurnDefault`, and
`Permissions(...)` instead of overloading `None`.
- Updated `zsh-fork` shell reruns to resolve `TurnDefault` at execution
time, which keeps ordinary `UseDefault` commands on the turn sandbox and
preserves turn-level macOS seatbelt profile extensions.
- Updated the `zsh-fork` skill path so a skill with no declared
permissions inherits the conversation's effective sandbox instead of
escalating unsandboxed.
- Updated the `zsh-fork` skill path so a skill with declared permissions
reruns with exactly those permissions, including when a cached session
approval is reused.

## Testing

- Added unit coverage in
`core/src/tools/runtimes/shell/unix_escalation.rs` for the explicit
`UseDefault` / `RequireEscalated` / `WithAdditionalPermissions`
execution mapping.
- Added unit coverage in
`core/src/tools/runtimes/shell/unix_escalation.rs` for macOS seatbelt
extension preservation in both the `TurnDefault` and
explicit-permissions rerun paths.
- Added integration coverage in `core/tests/suite/skill_approval.rs` for
permissionless skills inheriting the turn sandbox and explicit skill
permissions remaining bounded across cached approval reuse.
This commit is contained in:
Michael Bolin
2026-02-26 12:00:18 -08:00
committed by GitHub
parent 6b879fe248
commit 7fa9d9ae35
19 changed files with 1229 additions and 202 deletions

View File

@@ -2,13 +2,16 @@
#![cfg(unix)]
use anyhow::Result;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::FileSystemPermissions;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecApprovalRequestEvent;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::mount_function_call_agent_response;
@@ -65,6 +68,24 @@ async fn submit_turn_with_policies(
}
fn write_skill_with_shell_script(home: &Path, name: &str, script_name: &str) -> Result<PathBuf> {
write_skill_with_shell_script_contents(
home,
name,
script_name,
r#"#!/bin/sh
echo 'zsh-fork-stdout'
echo 'zsh-fork-stderr' >&2
"#,
)
}
#[cfg(unix)]
fn write_skill_with_shell_script_contents(
home: &Path,
name: &str,
script_name: &str,
script_contents: &str,
) -> Result<PathBuf> {
use std::os::unix::fs::PermissionsExt;
let skill_dir = home.join("skills").join(name);
@@ -82,13 +103,7 @@ description: {name} skill
)?;
let script_path = scripts_dir.join(script_name);
fs::write(
&script_path,
r#"#!/bin/sh
echo 'zsh-fork-stdout'
echo 'zsh-fork-stderr' >&2
"#,
)?;
fs::write(&script_path, script_contents)?;
let mut permissions = fs::metadata(&script_path)?.permissions();
permissions.set_mode(0o755);
fs::set_permissions(&script_path, permissions)?;
@@ -129,34 +144,134 @@ fn supports_exec_wrapper_intercept(zsh_path: &Path) -> bool {
}
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_prompts_for_skill_script_execution() -> Result<()> {
use codex_config::Constrained;
use codex_protocol::protocol::ReviewDecision;
#[derive(Clone)]
struct ZshForkRuntime {
zsh_path: PathBuf,
main_execve_wrapper_exe: PathBuf,
}
skip_if_no_network!(Ok(()));
impl ZshForkRuntime {
fn apply_to_config(
&self,
config: &mut Config,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
) {
use codex_config::Constrained;
config.features.enable(Feature::ShellTool);
config.features.enable(Feature::ShellZshFork);
config.zsh_path = Some(self.zsh_path.clone());
config.main_execve_wrapper_exe = Some(self.main_execve_wrapper_exe.clone());
config.permissions.allow_login_shell = false;
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy);
}
}
fn restrictive_workspace_write_policy() -> SandboxPolicy {
SandboxPolicy::WorkspaceWrite {
writable_roots: Vec::new(),
read_only_access: Default::default(),
network_access: false,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
}
}
fn zsh_fork_runtime(test_name: &str) -> Result<Option<ZshForkRuntime>> {
let Some(zsh_path) = find_test_zsh_path()? else {
return Ok(());
return Ok(None);
};
if !supports_exec_wrapper_intercept(&zsh_path) {
eprintln!(
"skipping zsh-fork skill test: zsh does not support EXEC_WRAPPER intercepts ({})",
"skipping {test_name}: zsh does not support EXEC_WRAPPER intercepts ({})",
zsh_path.display()
);
return Ok(());
return Ok(None);
}
let Ok(main_execve_wrapper_exe) = codex_utils_cargo_bin::cargo_bin("codex-execve-wrapper")
else {
eprintln!("skipping zsh-fork skill test: unable to resolve `codex-execve-wrapper` binary");
eprintln!("skipping {test_name}: unable to resolve `codex-execve-wrapper` binary");
return Ok(None);
};
Ok(Some(ZshForkRuntime {
zsh_path,
main_execve_wrapper_exe,
}))
}
async fn build_zsh_fork_test<F>(
server: &wiremock::MockServer,
runtime: ZshForkRuntime,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
pre_build_hook: F,
) -> Result<TestCodex>
where
F: FnOnce(&Path) + Send + 'static,
{
let mut builder = test_codex()
.with_pre_build_hook(pre_build_hook)
.with_config(move |config| {
runtime.apply_to_config(config, approval_policy, sandbox_policy);
});
builder.build(server).await
}
fn skill_script_command(test: &TestCodex, script_name: &str) -> Result<(String, String)> {
let script_path = fs::canonicalize(
test.codex_home_path()
.join("skills/mbolin-test-skill/scripts")
.join(script_name),
)?;
let script_path_str = script_path.to_string_lossy().into_owned();
let command = shlex::try_join([script_path_str.as_str()])?;
Ok((script_path_str, command))
}
async fn wait_for_exec_approval_request(test: &TestCodex) -> Option<ExecApprovalRequestEvent> {
wait_for_event_match(test.codex.as_ref(), |event| match event {
EventMsg::ExecApprovalRequest(request) => Some(Some(request.clone())),
EventMsg::TurnComplete(_) => Some(None),
_ => None,
})
.await
}
async fn wait_for_turn_complete(test: &TestCodex) {
wait_for_event(test.codex.as_ref(), |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
}
fn output_shows_sandbox_denial(output: &str) -> bool {
output.contains("Permission denied")
|| output.contains("Operation not permitted")
|| output.contains("Read-only file system")
}
/// Focus on the approval payload: the skill should prompt before execution and
/// only advertise the permissions declared in its metadata.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_prompts_for_skill_script_execution() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork skill prompt test")? else {
return Ok(());
};
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-skill-call";
let mut builder = test_codex()
.with_pre_build_hook(|home| {
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
SandboxPolicy::new_workspace_write_policy(),
|home| {
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
write_skill_metadata(
home,
@@ -171,25 +286,11 @@ permissions:
"#,
)
.unwrap();
})
.with_config(move |config| {
config.features.enable(Feature::ShellTool);
config.features.enable(Feature::ShellZshFork);
config.zsh_path = Some(zsh_path.clone());
config.main_execve_wrapper_exe = Some(main_execve_wrapper_exe);
config.permissions.allow_login_shell = false;
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
config.permissions.sandbox_policy =
Constrained::allow_any(SandboxPolicy::new_workspace_write_policy());
});
let test = builder.build(&server).await?;
},
)
.await?;
let script_path = fs::canonicalize(
test.codex_home_path()
.join("skills/mbolin-test-skill/scripts/hello-mbolin.sh"),
)?;
let script_path_str = script_path.to_string_lossy().into_owned();
let command = shlex::try_join([script_path_str.as_str()])?;
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
@@ -203,12 +304,7 @@ permissions:
)
.await?;
let maybe_approval = wait_for_event_match(test.codex.as_ref(), |event| match event {
EventMsg::ExecApprovalRequest(request) => Some(Some(request.clone())),
EventMsg::TurnComplete(_) => Some(None),
_ => None,
})
.await;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => {
@@ -250,10 +346,7 @@ permissions:
})
.await?;
wait_for_event(test.codex.as_ref(), |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
@@ -268,58 +361,350 @@ permissions:
Ok(())
}
/// Look for `additional_permissions == None`, then verify that both the first
/// run and the cached session-approval rerun stay inside the turn sandbox.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_without_permissions_inherits_turn_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork inherited skill sandbox test")? else {
return Ok(());
};
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
let outside_path = outside_dir
.path()
.join("zsh-fork-skill-inherited-sandbox.txt");
let outside_path_quoted = shlex::try_join([outside_path.to_string_lossy().as_ref()])?;
let script_contents = format!(
"#!/bin/sh\nprintf '%s' forbidden > {outside_path_quoted}\ncat {outside_path_quoted}\n"
);
let outside_path_for_hook = outside_path.clone();
let script_contents_for_hook = script_contents.clone();
let workspace_write_policy = restrictive_workspace_write_policy();
let server = start_mock_server().await;
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
workspace_write_policy.clone(),
move |home| {
let _ = fs::remove_file(&outside_path_for_hook);
write_skill_with_shell_script_contents(
home,
"mbolin-test-skill",
"sandboxed.sh",
&script_contents_for_hook,
)
.unwrap();
},
)
.await?;
let (script_path_str, command) = skill_script_command(&test, "sandboxed.sh")?;
let first_call_id = "zsh-fork-skill-permissions-1";
let first_arguments = shell_command_arguments(&command)?;
let first_mocks = mount_function_call_agent_response(
&server,
first_call_id,
&first_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy.clone(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => panic!("expected exec approval request before completion"),
};
assert_eq!(approval.call_id, first_call_id);
assert_eq!(approval.command, vec![script_path_str.clone()]);
assert_eq!(approval.additional_permissions, None);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::ApprovedForSession,
})
.await?;
wait_for_turn_complete(&test).await;
let first_output = first_mocks
.completion
.single_request()
.function_call_output(first_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
output_shows_sandbox_denial(&first_output) || !first_output.contains("forbidden"),
"expected inherited turn sandbox denial on first run, got output: {first_output:?}"
);
assert!(
!outside_path.exists(),
"first run should not write outside the turn sandbox"
);
let second_call_id = "zsh-fork-skill-permissions-2";
let second_arguments = shell_command_arguments(&command)?;
let second_mocks = mount_function_call_agent_response(
&server,
second_call_id,
&second_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy,
)
.await?;
let cached_approval = wait_for_exec_approval_request(&test).await;
assert!(
cached_approval.is_none(),
"expected second run to reuse the cached session approval"
);
let second_output = second_mocks
.completion
.single_request()
.function_call_output(second_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
output_shows_sandbox_denial(&second_output) || !second_output.contains("forbidden"),
"expected cached skill approval to retain inherited turn sandboxing, got output: {second_output:?}"
);
assert!(
!outside_path.exists(),
"cached session approval should not widen a permissionless skill to full access"
);
Ok(())
}
/// The validation to focus on is: writes to the skill-approved folder succeed,
/// and writes to an unrelated folder fail, both before and after cached approval.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_session_approval_enforces_skill_permissions() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork explicit skill sandbox test")? else {
return Ok(());
};
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
let allowed_dir = outside_dir.path().join("allowed-output");
let blocked_dir = outside_dir.path().join("blocked-output");
fs::create_dir_all(&allowed_dir)?;
fs::create_dir_all(&blocked_dir)?;
let allowed_path = allowed_dir.join("allowed.txt");
let blocked_path = blocked_dir.join("blocked.txt");
let allowed_path_quoted = shlex::try_join([allowed_path.to_string_lossy().as_ref()])?;
let blocked_path_quoted = shlex::try_join([blocked_path.to_string_lossy().as_ref()])?;
let script_contents = format!(
"#!/bin/sh\nprintf '%s' allowed > {allowed_path_quoted}\ncat {allowed_path_quoted}\nprintf '%s' forbidden > {blocked_path_quoted}\nif [ -f {blocked_path_quoted} ]; then echo blocked-created; fi\n"
);
let allowed_dir_for_hook = allowed_dir.clone();
let allowed_path_for_hook = allowed_path.clone();
let blocked_path_for_hook = blocked_path.clone();
let script_contents_for_hook = script_contents.clone();
let permissions_yaml = format!(
"permissions:\n file_system:\n write:\n - \"{}\"\n",
allowed_dir.display()
);
let workspace_write_policy = restrictive_workspace_write_policy();
let server = start_mock_server().await;
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
workspace_write_policy.clone(),
move |home| {
let _ = fs::remove_file(&allowed_path_for_hook);
let _ = fs::remove_file(&blocked_path_for_hook);
fs::create_dir_all(&allowed_dir_for_hook).unwrap();
fs::create_dir_all(blocked_path_for_hook.parent().unwrap()).unwrap();
write_skill_with_shell_script_contents(
home,
"mbolin-test-skill",
"sandboxed.sh",
&script_contents_for_hook,
)
.unwrap();
write_skill_metadata(home, "mbolin-test-skill", &permissions_yaml).unwrap();
},
)
.await?;
let (script_path_str, command) = skill_script_command(&test, "sandboxed.sh")?;
let first_call_id = "zsh-fork-skill-permissions-1";
let first_arguments = shell_command_arguments(&command)?;
let first_mocks = mount_function_call_agent_response(
&server,
first_call_id,
&first_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy.clone(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => panic!("expected exec approval request before completion"),
};
assert_eq!(approval.call_id, first_call_id);
assert_eq!(approval.command, vec![script_path_str.clone()]);
assert_eq!(
approval.additional_permissions,
Some(PermissionProfile {
file_system: Some(FileSystemPermissions {
read: None,
write: Some(vec![allowed_dir.clone()]),
}),
..Default::default()
})
);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::ApprovedForSession,
})
.await?;
wait_for_turn_complete(&test).await;
let first_output = first_mocks
.completion
.single_request()
.function_call_output(first_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
first_output.contains("allowed"),
"expected skill sandbox to permit writes to the approved folder, got output: {first_output:?}"
);
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
assert!(
!blocked_path.exists(),
"first run should not write outside the explicit skill sandbox"
);
assert!(
!first_output.contains("blocked-created"),
"blocked path should not have been created: {first_output:?}"
);
let second_call_id = "zsh-fork-skill-permissions-2";
let second_arguments = shell_command_arguments(&command)?;
let second_mocks = mount_function_call_agent_response(
&server,
second_call_id,
&second_arguments,
"shell_command",
)
.await;
let _ = fs::remove_file(&allowed_path);
let _ = fs::remove_file(&blocked_path);
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy,
)
.await?;
let cached_approval = wait_for_exec_approval_request(&test).await;
assert!(
cached_approval.is_none(),
"expected second run to reuse the cached session approval"
);
let second_output = second_mocks
.completion
.single_request()
.function_call_output(second_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
second_output.contains("allowed"),
"expected cached skill approval to retain the explicit skill sandbox, got output: {second_output:?}"
);
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
assert!(
!blocked_path.exists(),
"cached session approval should not widen skill execution beyond the explicit skill sandbox"
);
assert!(
!second_output.contains("blocked-created"),
"blocked path should not have been created after cached approval: {second_output:?}"
);
Ok(())
}
/// This stays narrow on purpose: the important check is that `WorkspaceWrite`
/// continues to deny writes outside the workspace even under `zsh-fork`.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_still_enforces_workspace_write_sandbox() -> Result<()> {
use codex_config::Constrained;
use codex_protocol::protocol::AskForApproval;
skip_if_no_network!(Ok(()));
let Some(zsh_path) = find_test_zsh_path()? else {
return Ok(());
};
if !supports_exec_wrapper_intercept(&zsh_path) {
eprintln!(
"skipping zsh-fork sandbox test: zsh does not support EXEC_WRAPPER intercepts ({})",
zsh_path.display()
);
return Ok(());
}
let Ok(main_execve_wrapper_exe) = codex_utils_cargo_bin::cargo_bin("codex-execve-wrapper")
else {
eprintln!(
"skipping zsh-fork sandbox test: unable to resolve `codex-execve-wrapper` binary"
);
let Some(runtime) = zsh_fork_runtime("zsh-fork workspace sandbox test")? else {
return Ok(());
};
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-workspace-write-deny";
let outside_path = "/tmp/codex-zsh-fork-workspace-write-deny.txt";
let workspace_write_policy = SandboxPolicy::WorkspaceWrite {
writable_roots: Vec::new(),
read_only_access: Default::default(),
network_access: false,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
};
let policy_for_config = workspace_write_policy.clone();
let workspace_write_policy = restrictive_workspace_write_policy();
let _ = fs::remove_file(outside_path);
let mut builder = test_codex()
.with_pre_build_hook(move |_| {
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::Never,
workspace_write_policy.clone(),
move |_| {
let _ = fs::remove_file(outside_path);
})
.with_config(move |config| {
config.features.enable(Feature::ShellTool);
config.features.enable(Feature::ShellZshFork);
config.zsh_path = Some(zsh_path.clone());
config.main_execve_wrapper_exe = Some(main_execve_wrapper_exe);
config.permissions.allow_login_shell = false;
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::Never);
config.permissions.sandbox_policy = Constrained::allow_any(policy_for_config);
});
let test = builder.build(&server).await?;
},
)
.await?;
let command = format!("touch {outside_path}");
let arguments = shell_command_arguments(&command)?;
@@ -335,7 +720,7 @@ async fn shell_zsh_fork_still_enforces_workspace_write_sandbox() -> Result<()> {
)
.await?;
wait_for_turn_complete_without_skill_approval(&test).await;
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
@@ -343,9 +728,7 @@ async fn shell_zsh_fork_still_enforces_workspace_write_sandbox() -> Result<()> {
.function_call_output(tool_call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
output.contains("Permission denied")
|| output.contains("Operation not permitted")
|| output.contains("Read-only file system"),
output_shows_sandbox_denial(output),
"expected sandbox denial, got output: {output:?}"
);
assert!(