Sandboxing iteration 2

This commit is contained in:
jimmyfraiture
2025-09-29 19:34:12 +01:00
parent 7b6d8b60c9
commit 5b74f10a7b
12 changed files with 718 additions and 634 deletions

View File

@@ -0,0 +1,95 @@
use std::sync::Arc;
use async_trait::async_trait;
use crate::apply_patch::ApplyPatchExec;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::executor::sandbox::build_exec_params_for_apply_patch;
use crate::function_tool::FunctionCallError;
pub(crate) enum ExecutionMode {
Shell,
ApplyPatch(ApplyPatchExec),
}
#[async_trait]
/// Backend-specific hooks that prepare and post-process execution requests for a
/// given [`ExecutionMode`].
pub(crate) trait ExecutionBackend: Send + Sync {
fn prepare(
&self,
params: ExecParams,
// Required for downcasting the apply_patch.
mode: &ExecutionMode,
) -> Result<ExecParams, FunctionCallError>;
async fn finalize(
&self,
output: ExecToolCallOutput,
_mode: &ExecutionMode,
) -> Result<ExecToolCallOutput, FunctionCallError> {
Ok(output)
}
}
pub(crate) struct BackendStore {
shell: Arc<dyn ExecutionBackend>,
apply_patch: Arc<dyn ExecutionBackend>,
}
impl BackendStore {
pub(crate) fn new() -> Self {
Self {
shell: Arc::new(ShellBackend),
apply_patch: Arc::new(ApplyPatchBackend),
}
}
pub(crate) fn for_mode(&self, mode: &ExecutionMode) -> Arc<dyn ExecutionBackend> {
match mode {
ExecutionMode::Shell => self.shell.clone(),
ExecutionMode::ApplyPatch(_) => self.apply_patch.clone(),
}
}
}
pub(crate) fn default_backends() -> BackendStore {
BackendStore::new()
}
struct ShellBackend;
#[async_trait]
impl ExecutionBackend for ShellBackend {
fn prepare(
&self,
params: ExecParams,
mode: &ExecutionMode,
) -> Result<ExecParams, FunctionCallError> {
match mode {
ExecutionMode::Shell => Ok(params),
_ => Err(FunctionCallError::RespondToModel(
"shell backend invoked with non-shell mode".to_string(),
)),
}
}
}
struct ApplyPatchBackend;
#[async_trait]
impl ExecutionBackend for ApplyPatchBackend {
fn prepare(
&self,
params: ExecParams,
mode: &ExecutionMode,
) -> Result<ExecParams, FunctionCallError> {
match mode {
ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, &params),
ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
"apply_patch backend invoked without patch context".to_string(),
)),
}
}
}

View File

@@ -0,0 +1,25 @@
use std::collections::HashSet;
use std::sync::Arc;
use std::sync::Mutex;
#[derive(Clone, Debug, Default)]
/// Thread-safe store of user approvals so repeated commands can reuse
/// previously granted trust.
pub(crate) struct ApprovalCache {
inner: Arc<Mutex<HashSet<Vec<String>>>>,
}
impl ApprovalCache {
pub(crate) fn insert(&self, command: Vec<String>) {
if command.is_empty() {
return;
}
if let Ok(mut guard) = self.inner.lock() {
guard.insert(command);
}
}
pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
self.inner.lock().map(|g| g.clone()).unwrap_or_default()
}
}

View File

@@ -0,0 +1,11 @@
mod backends;
mod cache;
mod runner;
mod sandbox;
pub(crate) use backends::ExecutionMode;
pub(crate) use runner::ExecError;
pub(crate) use runner::ExecutionRequest;
pub(crate) use runner::Executor;
pub(crate) use runner::ExecutorConfig;
pub(crate) use runner::normalize_exec_result;

View File

@@ -0,0 +1,306 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::RwLock;
use std::time::Duration;
use thiserror::Error;
use super::backends::BackendStore;
use super::backends::ExecutionBackend;
use super::backends::ExecutionMode;
use super::backends::default_backends;
use super::cache::ApprovalCache;
use crate::codex::Session;
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::error::get_error_message_ui;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::StreamOutput;
use crate::exec::process_exec_tool_call;
use crate::executor::sandbox::select_sandbox;
use crate::function_tool::FunctionCallError;
use crate::protocol::AskForApproval;
use crate::protocol::ReviewDecision;
use crate::protocol::SandboxPolicy;
#[derive(Clone, Debug)]
pub(crate) struct ExecutorConfig {
pub(crate) sandbox_policy: SandboxPolicy,
pub(crate) sandbox_cwd: PathBuf,
codex_linux_sandbox_exe: Option<PathBuf>,
}
impl ExecutorConfig {
pub(crate) fn new(
sandbox_policy: SandboxPolicy,
sandbox_cwd: PathBuf,
codex_linux_sandbox_exe: Option<PathBuf>,
) -> Self {
Self {
sandbox_policy,
sandbox_cwd,
codex_linux_sandbox_exe,
}
}
}
#[derive(Debug, Error)]
pub enum ExecError {
#[error(transparent)]
Function(#[from] FunctionCallError),
#[error(transparent)]
Codex(#[from] CodexErr),
}
impl ExecError {
pub(crate) fn rejection(msg: impl Into<String>) -> Self {
FunctionCallError::RespondToModel(msg.into()).into()
}
}
/// Coordinates sandbox selection, backend-specific preparation, and command
/// execution for tool calls requested by the model.
pub(crate) struct Executor {
backends: BackendStore,
approval_cache: ApprovalCache,
config: Arc<RwLock<ExecutorConfig>>,
}
impl Executor {
pub(crate) fn new(config: ExecutorConfig) -> Self {
Self {
backends: default_backends(),
approval_cache: ApprovalCache::default(),
config: Arc::new(RwLock::new(config)),
}
}
/// Updates the sandbox policy and working directory used for future
/// executions without recreating the executor.
pub(crate) fn update_environment(
&self,
sandbox_policy: SandboxPolicy,
sandbox_cwd: PathBuf,
codex_linux_sandbox_exe: Option<PathBuf>,
) {
if let Ok(mut cfg) = self.config.write() {
cfg.sandbox_policy = sandbox_policy;
cfg.sandbox_cwd = sandbox_cwd;
cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe;
}
}
/// Runs a prepared execution request end-to-end: prepares parameters, decides on
/// sandbox placement (prompting the user when necessary), launches the command,
/// and lets the backend post-process the final output.
pub(crate) async fn run(
&self,
mut request: ExecutionRequest,
session: &Session,
approval_policy: AskForApproval,
sub_id: &str,
call_id: &str,
) -> Result<ExecToolCallOutput, ExecError> {
// Step 1: Normalise parameters via the selected backend.
let backend = self.backends.for_mode(&request.mode);
request.params = backend
.prepare(request.params, &request.mode)
.map_err(ExecError::from)?;
// Step 2: Snapshot sandbox configuration so it stays stable for this run.
let config = self
.config
.read()
.map_err(|_| ExecError::rejection("executor config poisoned"))?
.clone();
// Step 3: Decide sandbox placement, prompting for approval when needed.
let sandbox_decision = select_sandbox(
&request,
approval_policy,
self.approval_cache.snapshot(),
&config,
session,
sub_id,
call_id,
)
.await?;
if sandbox_decision.record_session_approval {
self.approval_cache.insert(request.approval_command.clone());
}
// Step 4: Launch the command within the chosen sandbox.
let first_attempt = self
.spawn(
request.params.clone(),
sandbox_decision.initial_sandbox,
&config,
request.stdout_stream.clone(),
)
.await;
// Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
let raw_output = match first_attempt {
Ok(output) => output,
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
return Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into());
}
Err(CodexErr::Sandbox(error @ SandboxErr::Denied { .. })) => {
return if sandbox_decision.escalate_on_failure {
self.retry_without_sandbox(
&*backend, &request, &config, session, sub_id, call_id, error,
)
.await
} else {
Err(ExecError::rejection(format!(
"failed in sandbox {:?} with execution error: {error:?}",
sandbox_decision.initial_sandbox
)))
};
}
Err(err) => return Err(err.into()),
};
// Step 6: Allow the backend to post-process the raw output.
backend
.finalize(raw_output, &request.mode)
.await
.map_err(ExecError::from)
}
/// Fallback path invoked when a sandboxed run is denied so the user can
/// approve rerunning without isolation.
#[allow(clippy::too_many_arguments)]
async fn retry_without_sandbox(
&self,
backend: &dyn ExecutionBackend,
request: &ExecutionRequest,
config: &ExecutorConfig,
session: &Session,
sub_id: &str,
call_id: &str,
sandbox_error: SandboxErr,
) -> Result<ExecToolCallOutput, ExecError> {
session
.notify_background_event(sub_id, format!("Execution failed: {sandbox_error}"))
.await;
let decision = session
.request_command_approval(
sub_id.to_string(),
call_id.to_string(),
request.approval_command.clone(),
request.params.cwd.clone(),
Some("command failed; retry without sandbox?".to_string()),
)
.await;
match decision {
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
if matches!(decision, ReviewDecision::ApprovedForSession) {
self.approval_cache.insert(request.approval_command.clone());
}
session
.notify_background_event(sub_id, "retrying command without sandbox")
.await;
let retry_output = self
.spawn(
request.params.clone(),
SandboxType::None,
config,
request.stdout_stream.clone(),
)
.await?;
backend
.finalize(retry_output, &request.mode)
.await
.map_err(ExecError::from)
}
ReviewDecision::Denied | ReviewDecision::Abort => {
Err(ExecError::rejection("exec command rejected by user"))
}
}
}
async fn spawn(
&self,
params: ExecParams,
sandbox: SandboxType,
config: &ExecutorConfig,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput, CodexErr> {
process_exec_tool_call(
params,
sandbox,
&config.sandbox_policy,
&config.sandbox_cwd,
&config.codex_linux_sandbox_exe,
stdout_stream,
)
.await
}
}
pub(crate) struct ExecutionRequest {
pub params: ExecParams,
pub approval_command: Vec<String>,
pub mode: ExecutionMode,
pub stdout_stream: Option<StdoutStream>,
}
pub(crate) struct NormalizedExecOutput<'a> {
borrowed: Option<&'a ExecToolCallOutput>,
synthetic: Option<ExecToolCallOutput>,
}
impl<'a> NormalizedExecOutput<'a> {
pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
match (self.borrowed, self.synthetic.as_ref()) {
(Some(output), _) => output,
(None, Some(output)) => output,
(None, None) => unreachable!("normalized exec output missing data"),
}
}
}
/// Converts a raw execution result into a uniform view that always exposes an
/// [`ExecToolCallOutput`], synthesizing error output when the command fails
/// before producing a response.
pub(crate) fn normalize_exec_result(
result: &Result<ExecToolCallOutput, ExecError>,
) -> NormalizedExecOutput<'_> {
match result {
Ok(output) => NormalizedExecOutput {
borrowed: Some(output),
synthetic: None,
},
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
NormalizedExecOutput {
borrowed: Some(output.as_ref()),
synthetic: None,
}
}
Err(err) => {
let message = match err {
ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(),
ExecError::Codex(e) => get_error_message_ui(e),
};
let synthetic = ExecToolCallOutput {
exit_code: -1,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(message.clone()),
aggregated_output: StreamOutput::new(message),
duration: Duration::default(),
timed_out: false,
};
NormalizedExecOutput {
borrowed: None,
synthetic: Some(synthetic),
}
}
}
}

View File

@@ -0,0 +1,193 @@
use crate::CODEX_APPLY_PATCH_ARG1;
use crate::apply_patch::ApplyPatchExec;
use crate::codex::Session;
use crate::exec::ExecParams;
use crate::exec::SandboxType;
use crate::executor::ExecError;
use crate::executor::ExecutionMode;
use crate::executor::ExecutionRequest;
use crate::executor::ExecutorConfig;
use crate::function_tool::FunctionCallError;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_patch_safety;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use std::collections::HashMap;
use std::collections::HashSet;
use std::env;
/// Sandbox placement options selected for an execution run, including whether
/// to escalate after failures and whether approvals should persist.
pub(crate) struct SandboxDecision {
pub(crate) initial_sandbox: SandboxType,
pub(crate) escalate_on_failure: bool,
pub(crate) record_session_approval: bool,
}
impl SandboxDecision {
fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
Self {
initial_sandbox: sandbox,
escalate_on_failure,
record_session_approval: false,
}
}
fn user_override(record_session_approval: bool) -> Self {
Self {
initial_sandbox: SandboxType::None,
escalate_on_failure: false,
record_session_approval,
}
}
}
fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
matches!(
(approval, sandbox),
(
AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
)
)
}
/// Builds the command-line invocation that shells out to `codex apply_patch`
/// using the provided apply-patch request details.
pub(crate) fn build_exec_params_for_apply_patch(
exec: &ApplyPatchExec,
original: &ExecParams,
) -> Result<ExecParams, FunctionCallError> {
let path_to_codex = env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string())
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
)
})?;
let patch = exec.action.patch.clone();
Ok(ExecParams {
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
cwd: exec.action.cwd.clone(),
timeout_ms: original.timeout_ms,
// Run apply_patch with a minimal environment for determinism and to
// avoid leaking host environment variables into the patch process.
env: HashMap::new(),
with_escalated_permissions: original.with_escalated_permissions,
justification: original.justification.clone(),
})
}
/// Determines how a command should be sandboxed, prompting the user when
/// policy requires explicit approval.
pub async fn select_sandbox(
request: &ExecutionRequest,
approval_policy: AskForApproval,
approval_cache: HashSet<Vec<String>>,
config: &ExecutorConfig,
session: &Session,
sub_id: &str,
call_id: &str,
) -> Result<SandboxDecision, ExecError> {
match &request.mode {
ExecutionMode::Shell => {
select_shell_sandbox(
request,
approval_policy,
approval_cache,
config,
session,
sub_id,
call_id,
)
.await
}
ExecutionMode::ApplyPatch(exec) => {
select_apply_patch_sandbox(exec, approval_policy, config)
}
}
}
async fn select_shell_sandbox(
request: &ExecutionRequest,
approval_policy: AskForApproval,
approved_snapshot: HashSet<Vec<String>>,
config: &ExecutorConfig,
session: &Session,
sub_id: &str,
call_id: &str,
) -> Result<SandboxDecision, ExecError> {
let command_for_safety = if request.approval_command.is_empty() {
request.params.command.clone()
} else {
request.approval_command.clone()
};
let safety = assess_command_safety(
&command_for_safety,
approval_policy,
&config.sandbox_policy,
&approved_snapshot,
request.params.with_escalated_permissions.unwrap_or(false),
);
match safety {
SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
sandbox_type,
should_escalate_on_failure(approval_policy, sandbox_type),
)),
SafetyCheck::AskUser => {
let decision = session
.request_command_approval(
sub_id.to_string(),
call_id.to_string(),
request.approval_command.clone(),
request.params.cwd.clone(),
request.params.justification.clone(),
)
.await;
match decision {
ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
ReviewDecision::Denied | ReviewDecision::Abort => {
Err(ExecError::rejection("exec command rejected by user"))
}
}
}
SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
"exec command rejected: {reason}"
))),
}
}
fn select_apply_patch_sandbox(
exec: &ApplyPatchExec,
approval_policy: AskForApproval,
config: &ExecutorConfig,
) -> Result<SandboxDecision, ExecError> {
if exec.user_explicitly_approved_this_action {
return Ok(SandboxDecision::user_override(false));
}
match assess_patch_safety(
&exec.action,
approval_policy,
&config.sandbox_policy,
&config.sandbox_cwd,
) {
SafetyCheck::AutoApprove { sandbox_type } => Ok(SandboxDecision::auto(
sandbox_type,
should_escalate_on_failure(approval_policy, sandbox_type),
)),
SafetyCheck::AskUser => Err(ExecError::rejection(
"patch requires approval but none was recorded",
)),
SafetyCheck::Reject { reason } => {
Err(ExecError::rejection(format!("patch rejected: {reason}")))
}
}
}