Files
codex/codex-rs/core/tests/suite/mod.rs
Michael Bolin 7fa9d9ae35 feat: include sandbox config with escalation request (#12839)
## Why

Before this change, an escalation approval could say that a command
should be rerun, but it could not carry the sandbox configuration that
should still apply when the escalated command is actually spawned.

That left an unsafe gap in the `zsh-fork` skill path: skill scripts
under `scripts/` that did not declare permissions could be escalated
without a sandbox, and scripts that did declare permissions could lose
their bounded sandbox on rerun or cached session approval.

This PR extends the escalation protocol so approvals can optionally
carry sandbox configuration all the way through execution. That lets the
shell runtime preserve the intended sandbox instead of silently widening
access.

We likely want a single permissions type for this codepath eventually,
probably centered on `Permissions`. For now, the protocol needs to
represent both the existing `PermissionProfile` form and the fuller
`Permissions` form, so this introduces a temporary disjoint union,
`EscalationPermissions`, to carry either one.

Further, this means that today, a skill either:

- does not declare any permissions, in which case it is run using the
default sandbox for the turn
- specifies permissions, in which case the skill is run using that exact
sandbox, which might be more restrictive than the default sandbox for
the turn

We will likely change the skill's permissions to be additive to the
existing permissions for the turn.

## What Changed

- Added `EscalationPermissions` to `codex-protocol` so escalation
requests can carry either a `PermissionProfile` or a full `Permissions`
payload.
- Added an explicit `EscalationExecution` mode to the shell escalation
protocol so reruns distinguish between `Unsandboxed`, `TurnDefault`, and
`Permissions(...)` instead of overloading `None`.
- Updated `zsh-fork` shell reruns to resolve `TurnDefault` at execution
time, which keeps ordinary `UseDefault` commands on the turn sandbox and
preserves turn-level macOS seatbelt profile extensions.
- Updated the `zsh-fork` skill path so a skill with no declared
permissions inherits the conversation's effective sandbox instead of
escalating unsandboxed.
- Updated the `zsh-fork` skill path so a skill with declared permissions
reruns with exactly those permissions, including when a cached session
approval is reused.

## Testing

- Added unit coverage in
`core/src/tools/runtimes/shell/unix_escalation.rs` for the explicit
`UseDefault` / `RequireEscalated` / `WithAdditionalPermissions`
execution mapping.
- Added unit coverage in
`core/src/tools/runtimes/shell/unix_escalation.rs` for macOS seatbelt
extension preservation in both the `TurnDefault` and
explicit-permissions rerun paths.
- Added integration coverage in `core/tests/suite/skill_approval.rs` for
permissionless skills inheriting the turn sandbox and explicit skill
permissions remaining bounded across cached approval reuse.
2026-02-26 12:00:18 -08:00

138 lines
3.4 KiB
Rust

// Aggregates all former standalone integration tests as modules.
use std::ffi::OsString;
use codex_arg0::Arg0PathEntryGuard;
use codex_arg0::arg0_dispatch;
use ctor::ctor;
use tempfile::TempDir;
struct TestCodexAliasesGuard {
_codex_home: TempDir,
_arg0: Arg0PathEntryGuard,
_previous_codex_home: Option<OsString>,
}
const CODEX_HOME_ENV_VAR: &str = "CODEX_HOME";
// This code runs before any other tests are run.
// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox
// based on the arg0.
// NOTE: this doesn't work on ARM
#[ctor]
pub static CODEX_ALIASES_TEMP_DIR: TestCodexAliasesGuard = unsafe {
#[allow(clippy::unwrap_used)]
let codex_home = tempfile::Builder::new()
.prefix("codex-core-tests")
.tempdir()
.unwrap();
let previous_codex_home = std::env::var_os(CODEX_HOME_ENV_VAR);
// arg0_dispatch() creates helper links under CODEX_HOME/tmp. Point it at a
// test-owned temp dir so startup never mutates the developer's real ~/.codex.
//
// Safety: #[ctor] runs before tests start, so no test threads exist yet.
unsafe {
std::env::set_var(CODEX_HOME_ENV_VAR, codex_home.path());
}
#[allow(clippy::unwrap_used)]
let arg0 = arg0_dispatch().unwrap();
// Restore the process environment immediately so later tests observe the
// same CODEX_HOME state they started with.
match previous_codex_home.as_ref() {
Some(value) => unsafe {
std::env::set_var(CODEX_HOME_ENV_VAR, value);
},
None => unsafe {
std::env::remove_var(CODEX_HOME_ENV_VAR);
},
}
TestCodexAliasesGuard {
_codex_home: codex_home,
_arg0: arg0,
_previous_codex_home: previous_codex_home,
}
};
#[cfg(not(target_os = "windows"))]
mod abort_tasks;
mod agent_jobs;
mod agent_websocket;
mod apply_patch_cli;
#[cfg(not(target_os = "windows"))]
mod approvals;
mod auth_refresh;
mod cli_stream;
mod client;
mod client_websockets;
mod codex_delegate;
mod collaboration_instructions;
mod compact;
mod compact_remote;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;
mod exec_policy;
mod fork_thread;
mod grep_files;
mod hierarchical_agents;
mod image_rollout;
mod items;
mod js_repl;
mod json_result;
mod list_dir;
mod live_cli;
mod live_reload;
mod memories;
mod model_info_overrides;
mod model_overrides;
mod model_switching;
mod model_visible_layout;
mod models_cache_ttl;
mod models_etag_responses;
mod otel;
mod pending_input;
mod permissions_messages;
mod personality;
mod personality_migration;
mod prompt_caching;
mod quota_exceeded;
mod read_file;
mod realtime_conversation;
mod remote_models;
mod request_compression;
#[cfg(not(target_os = "windows"))]
mod request_permissions;
mod request_user_input;
mod resume;
mod resume_warning;
mod review;
mod rmcp_client;
mod rollout_list_find;
mod safety_check_downgrade;
mod search_tool;
mod seatbelt;
mod shell_command;
mod shell_serialization;
mod shell_snapshot;
mod skill_approval;
mod skills;
mod sqlite_state;
mod stream_error_allows_next_turn;
mod stream_no_completed;
mod subagent_notifications;
mod text_encoding_fix;
mod tool_harness;
mod tool_parallelism;
mod tools;
mod truncation;
mod turn_state;
mod undo;
mod unified_exec;
mod unstable_features_warning;
mod user_notification;
mod user_shell_cmd;
mod view_image;
mod web_search;
mod websocket_fallback;