This commit is contained in:
jimmyfraiture
2025-09-26 13:42:58 +02:00
parent eb2b739d6a
commit 805de19381
8 changed files with 404 additions and 119 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -673,6 +673,7 @@ dependencies = [
"askama",
"assert_cmd",
"async-channel",
"async-trait",
"base64",
"bytes",
"chrono",

View File

@@ -15,6 +15,7 @@ workspace = true
anyhow = { workspace = true }
askama = { workspace = true }
async-channel = { workspace = true }
async-trait = { workspace = true }
base64 = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true, features = ["serde"] }

View File

@@ -44,7 +44,6 @@ use tracing::warn;
use crate::ModelProviderInfo;
use crate::apply_patch;
use crate::apply_patch::ApplyPatchExec;
use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
use crate::apply_patch::InternalApplyPatchInvocation;
use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::client::ModelClient;
@@ -63,7 +62,6 @@ use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::StreamOutput;
use crate::exec::process_exec_tool_call;
use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
use crate::exec_command::ExecCommandParams;
use crate::exec_command::ExecSessionManager;
@@ -114,9 +112,15 @@ use crate::protocol::TurnDiffEvent;
use crate::protocol::WebSearchBeginEvent;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_safety_for_untrusted_command;
use crate::sandbox::BackendRegistry;
use crate::sandbox::ExecPlan;
use crate::sandbox::ExecRequest;
use crate::sandbox::ExecRuntimeContext;
use crate::sandbox::PatchExecRequest;
use crate::sandbox::build_exec_params_for_apply_patch;
use crate::sandbox::plan_apply_patch;
use crate::sandbox::plan_exec;
use crate::sandbox::run_with_plan;
use crate::shell;
use crate::state::ActiveTurn;
use crate::state::SessionServices;
@@ -942,15 +946,24 @@ impl Session {
self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
.await;
let result = process_exec_tool_call(
exec_args.params,
exec_args.sandbox_type,
exec_args.sandbox_policy,
exec_args.sandbox_cwd,
exec_args.codex_linux_sandbox_exe,
exec_args.stdout_stream,
)
.await;
let ExecInvokeArgs {
params,
plan,
sandbox_policy,
sandbox_cwd,
codex_linux_sandbox_exe,
stdout_stream,
} = exec_args;
let registry = BackendRegistry::new();
let runtime_ctx = ExecRuntimeContext {
sandbox_policy,
sandbox_cwd,
codex_linux_sandbox_exe,
stdout_stream,
};
let result = run_with_plan(params, &plan, &registry, &runtime_ctx).await;
let output_stderr;
let borrowed: &ExecToolCallOutput = match &result {
@@ -2673,7 +2686,7 @@ fn parse_container_exec_arguments(
pub struct ExecInvokeArgs<'a> {
pub params: ExecParams,
pub sandbox_type: SandboxType,
pub plan: ExecPlan,
pub sandbox_policy: &'a SandboxPolicy,
pub sandbox_cwd: &'a Path,
pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
@@ -2740,98 +2753,80 @@ async fn handle_container_exec_with_params(
MaybeApplyPatchVerified::NotApplyPatch => None,
};
let (params, safety, command_for_display) = match &apply_patch_exec {
Some(ApplyPatchExec {
action: ApplyPatchAction { patch, cwd, .. },
user_explicitly_approved_this_action,
}) => {
let path_to_codex = std::env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string());
let Some(path_to_codex) = path_to_codex else {
let mut params = params;
let command_for_display;
let plan = if let Some(apply_patch_exec) = apply_patch_exec.as_ref() {
params = build_exec_params_for_apply_patch(apply_patch_exec, &params)?;
command_for_display = vec![
"apply_patch".to_string(),
apply_patch_exec.action.patch.clone(),
];
let plan_req = PatchExecRequest {
action: &apply_patch_exec.action,
approval: turn_context.approval_policy,
policy: &turn_context.sandbox_policy,
cwd: &turn_context.cwd,
user_explicitly_approved: apply_patch_exec.user_explicitly_approved_this_action,
};
match plan_apply_patch(&plan_req) {
plan @ ExecPlan::Approved { .. } => plan,
ExecPlan::AskUser { .. } => {
return Err(FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
"patch requires approval but none was recorded".to_string(),
));
};
let params = ExecParams {
command: vec![
path_to_codex,
CODEX_APPLY_PATCH_ARG1.to_string(),
patch.clone(),
],
cwd: cwd.clone(),
timeout_ms: params.timeout_ms,
env: HashMap::new(),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification.clone(),
};
let safety = if *user_explicitly_approved_this_action {
SafetyCheck::AutoApprove {
sandbox_type: SandboxType::None,
}
} else {
assess_safety_for_untrusted_command(
turn_context.approval_policy,
&turn_context.sandbox_policy,
params.with_escalated_permissions.unwrap_or(false),
)
};
(
params,
safety,
vec!["apply_patch".to_string(), patch.clone()],
)
}
ExecPlan::Reject { reason } => {
return Err(FunctionCallError::RespondToModel(format!(
"patch rejected: {reason}"
)));
}
}
None => {
let safety = {
let state = sess.state.lock().await;
assess_command_safety(
&params.command,
turn_context.approval_policy,
&turn_context.sandbox_policy,
state.approved_commands_ref(),
params.with_escalated_permissions.unwrap_or(false),
)
};
let command_for_display = params.command.clone();
(params, safety, command_for_display)
}
};
} else {
command_for_display = params.command.clone();
let sandbox_type = match safety {
SafetyCheck::AutoApprove { sandbox_type } => sandbox_type,
SafetyCheck::AskUser => {
let decision = sess
.request_command_approval(
sub_id.clone(),
call_id.clone(),
params.command.clone(),
params.cwd.clone(),
params.justification.clone(),
)
.await;
match decision {
ReviewDecision::Approved => (),
ReviewDecision::ApprovedForSession => {
sess.add_approved_command(params.command.clone()).await;
}
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(FunctionCallError::RespondToModel(
"exec command rejected by user".to_string(),
));
let initial_plan = {
let state = sess.state.lock().await;
plan_exec(&ExecRequest {
params: &params,
approval: turn_context.approval_policy,
policy: &turn_context.sandbox_policy,
approved_session_commands: state.approved_commands_ref(),
})
};
match initial_plan {
plan @ ExecPlan::Approved { .. } => plan,
ExecPlan::AskUser { reason } => {
let decision = sess
.request_command_approval(
sub_id.clone(),
call_id.clone(),
params.command.clone(),
params.cwd.clone(),
reason,
)
.await;
match decision {
ReviewDecision::Approved => ExecPlan::approved(SandboxType::None, false, true),
ReviewDecision::ApprovedForSession => {
sess.add_approved_command(params.command.clone()).await;
ExecPlan::approved(SandboxType::None, false, true)
}
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(FunctionCallError::RespondToModel(
"exec command rejected by user".to_string(),
));
}
}
}
// No sandboxing is applied because the user has given
// explicit approval. Often, we end up in this case because
// the command cannot be run in a sandbox, such as
// installing a new dependency that requires network access.
SandboxType::None
}
SafetyCheck::Reject { reason } => {
return Err(FunctionCallError::RespondToModel(format!(
"exec command rejected: {reason:?}"
)));
ExecPlan::Reject { reason } => {
return Err(FunctionCallError::RespondToModel(format!(
"exec command rejected: {reason:?}"
)));
}
}
};
@@ -2840,25 +2835,26 @@ async fn handle_container_exec_with_params(
call_id: call_id.clone(),
command_for_display: command_for_display.clone(),
cwd: params.cwd.clone(),
apply_patch: apply_patch_exec.map(
apply_patch: apply_patch_exec.as_ref().map(
|ApplyPatchExec {
action,
user_explicitly_approved_this_action,
}| ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes: convert_apply_patch_to_protocol(&action),
user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
changes: convert_apply_patch_to_protocol(action),
},
),
};
let params = maybe_translate_shell_command(params, sess, turn_context);
let plan_for_invocation = plan.clone();
let output_result = sess
.run_exec_with_events(
turn_diff_tracker,
exec_command_context.clone(),
ExecInvokeArgs {
params: params.clone(),
sandbox_type,
plan: plan_for_invocation,
sandbox_policy: &turn_context.sandbox_policy,
sandbox_cwd: &turn_context.cwd,
codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
@@ -2891,7 +2887,7 @@ async fn handle_container_exec_with_params(
params,
exec_command_context,
error,
sandbox_type,
&plan,
sess,
turn_context,
)
@@ -2908,7 +2904,7 @@ async fn handle_sandbox_error(
params: ExecParams,
exec_command_context: ExecCommandContext,
error: SandboxErr,
sandbox_type: SandboxType,
plan: &ExecPlan,
sess: &Session,
turn_context: &TurnContext,
) -> Result<String, FunctionCallError> {
@@ -2921,15 +2917,21 @@ async fn handle_sandbox_error(
return Err(FunctionCallError::RespondToModel(content));
}
// Early out if either the user never wants to be asked for approval, or
// we're letting the model manage escalation requests. Otherwise, continue
match turn_context.approval_policy {
AskForApproval::Never | AskForApproval::OnRequest => {
return Err(FunctionCallError::RespondToModel(format!(
"failed in sandbox {sandbox_type:?} with execution error: {error:?}"
)));
}
AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
let ExecPlan::Approved {
sandbox: sandbox_type,
on_failure_escalate,
..
} = plan
else {
return Err(FunctionCallError::RespondToModel(
"execution failed without an approved plan".to_string(),
));
};
if !on_failure_escalate {
return Err(FunctionCallError::RespondToModel(format!(
"failed in sandbox {sandbox_type:?} with execution error: {error:?}"
)));
}
// Note that when `error` is `SandboxErr::Denied`, it could be a false
@@ -2944,11 +2946,12 @@ async fn handle_sandbox_error(
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
.await;
let command_for_retry = params.command.clone();
let decision = sess
.request_command_approval(
sub_id.clone(),
call_id.clone(),
params.command.clone(),
command_for_retry.clone(),
cwd.clone(),
Some("command failed; retry without sandbox?".to_string()),
)
@@ -2960,7 +2963,7 @@ async fn handle_sandbox_error(
// remainder of the session so future
// executions skip the sandbox directly.
// TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
sess.add_approved_command(params.command.clone()).await;
sess.add_approved_command(command_for_retry.clone()).await;
// Inform UI we are retrying without sandbox.
sess.notify_background_event(&sub_id, "retrying command without sandbox")
.await;
@@ -2973,7 +2976,7 @@ async fn handle_sandbox_error(
exec_command_context.clone(),
ExecInvokeArgs {
params,
sandbox_type: SandboxType::None,
plan: ExecPlan::approved(SandboxType::None, false, true),
sandbox_policy: &turn_context.sandbox_policy,
sandbox_cwd: &turn_context.cwd,
codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,

View File

@@ -61,6 +61,7 @@ pub mod plan_tool;
pub mod project_doc;
mod rollout;
pub(crate) mod safety;
pub mod sandbox;
pub mod seatbelt;
pub mod shell;
pub mod spawn;

View File

@@ -0,0 +1,30 @@
use std::env;
use crate::apply_patch::ApplyPatchExec;
use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
use crate::exec::ExecParams;
use crate::function_tool::FunctionCallError;
pub(crate) fn build_exec_params_for_apply_patch(
exec: &ApplyPatchExec,
original: &ExecParams,
) -> Result<ExecParams, FunctionCallError> {
let path_to_codex = env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string())
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
)
})?;
let patch = exec.action.patch.clone();
Ok(ExecParams {
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
cwd: exec.action.cwd.clone(),
timeout_ms: original.timeout_ms,
env: original.env.clone(),
with_escalated_permissions: original.with_escalated_permissions,
justification: original.justification.clone(),
})
}

View File

@@ -0,0 +1,87 @@
use std::path::Path;
use std::path::PathBuf;
use async_trait::async_trait;
use crate::error::Result;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::process_exec_tool_call;
use crate::protocol::SandboxPolicy;
#[async_trait]
pub trait SpawnBackend: Send + Sync {
fn sandbox_type(&self) -> SandboxType;
async fn spawn(
&self,
params: ExecParams,
sandbox_policy: &SandboxPolicy,
sandbox_cwd: &Path,
codex_linux_sandbox_exe: &Option<PathBuf>,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput> {
process_exec_tool_call(
params,
self.sandbox_type(),
sandbox_policy,
sandbox_cwd,
codex_linux_sandbox_exe,
stdout_stream,
)
.await
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct DirectBackend;
#[async_trait]
impl SpawnBackend for DirectBackend {
fn sandbox_type(&self) -> SandboxType {
SandboxType::None
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct SeatbeltBackend;
#[async_trait]
impl SpawnBackend for SeatbeltBackend {
fn sandbox_type(&self) -> SandboxType {
SandboxType::MacosSeatbelt
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct LinuxBackend;
#[async_trait]
impl SpawnBackend for LinuxBackend {
fn sandbox_type(&self) -> SandboxType {
SandboxType::LinuxSeccomp
}
}
#[derive(Default)]
pub struct BackendRegistry {
direct: DirectBackend,
seatbelt: SeatbeltBackend,
linux: LinuxBackend,
}
impl BackendRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn for_type(&self, sandbox: SandboxType) -> &dyn SpawnBackend {
match sandbox {
SandboxType::None => &self.direct,
SandboxType::MacosSeatbelt => &self.seatbelt,
SandboxType::LinuxSeccomp => &self.linux,
}
}
}

View File

@@ -0,0 +1,50 @@
mod apply_patch_adapter;
mod backend;
mod planner;
pub(crate) use apply_patch_adapter::build_exec_params_for_apply_patch;
pub use backend::BackendRegistry;
pub use backend::DirectBackend;
pub use backend::LinuxBackend;
pub use backend::SeatbeltBackend;
pub use backend::SpawnBackend;
pub use planner::ExecPlan;
pub use planner::ExecRequest;
pub use planner::PatchExecRequest;
pub use planner::plan_apply_patch;
pub use planner::plan_exec;
use crate::error::Result;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::StdoutStream;
use crate::protocol::SandboxPolicy;
pub struct ExecRuntimeContext<'a> {
pub sandbox_policy: &'a SandboxPolicy,
pub sandbox_cwd: &'a std::path::Path,
pub codex_linux_sandbox_exe: &'a Option<std::path::PathBuf>,
pub stdout_stream: Option<StdoutStream>,
}
pub async fn run_with_plan(
params: ExecParams,
plan: &ExecPlan,
registry: &BackendRegistry,
runtime_ctx: &ExecRuntimeContext<'_>,
) -> Result<ExecToolCallOutput> {
let ExecPlan::Approved { sandbox, .. } = plan else {
unreachable!("run_with_plan called without approved plan");
};
registry
.for_type(*sandbox)
.spawn(
params,
runtime_ctx.sandbox_policy,
runtime_ctx.sandbox_cwd,
runtime_ctx.codex_linux_sandbox_exe,
runtime_ctx.stdout_stream.clone(),
)
.await
}

View File

@@ -0,0 +1,112 @@
use std::collections::HashSet;
use std::path::Path;
use codex_apply_patch::ApplyPatchAction;
use crate::exec::ExecParams;
use crate::exec::SandboxType;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_patch_safety;
#[derive(Clone, Debug)]
pub struct ExecRequest<'a> {
pub params: &'a ExecParams,
pub approval: AskForApproval,
pub policy: &'a SandboxPolicy,
pub approved_session_commands: &'a HashSet<Vec<String>>,
}
#[derive(Clone, Debug)]
pub enum ExecPlan {
Reject {
reason: String,
},
AskUser {
reason: Option<String>,
},
Approved {
sandbox: SandboxType,
on_failure_escalate: bool,
approved_by_user: bool,
},
}
impl ExecPlan {
pub fn approved(
sandbox: SandboxType,
on_failure_escalate: bool,
approved_by_user: bool,
) -> Self {
ExecPlan::Approved {
sandbox,
on_failure_escalate,
approved_by_user,
}
}
}
pub fn plan_exec(req: &ExecRequest<'_>) -> ExecPlan {
let params = req.params;
let with_escalated_permissions = params.with_escalated_permissions.unwrap_or(false);
let safety = assess_command_safety(
&params.command,
req.approval,
req.policy,
req.approved_session_commands,
with_escalated_permissions,
);
match safety {
SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
sandbox: sandbox_type,
on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
approved_by_user: false,
},
SafetyCheck::AskUser => ExecPlan::AskUser {
reason: params.justification.clone(),
},
SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
}
}
#[derive(Clone, Debug)]
pub struct PatchExecRequest<'a> {
pub action: &'a ApplyPatchAction,
pub approval: AskForApproval,
pub policy: &'a SandboxPolicy,
pub cwd: &'a Path,
pub user_explicitly_approved: bool,
}
pub fn plan_apply_patch(req: &PatchExecRequest<'_>) -> ExecPlan {
if req.user_explicitly_approved {
ExecPlan::Approved {
sandbox: SandboxType::None,
on_failure_escalate: false,
approved_by_user: true,
}
} else {
match assess_patch_safety(req.action, req.approval, req.policy, req.cwd) {
SafetyCheck::AutoApprove { sandbox_type } => ExecPlan::Approved {
sandbox: sandbox_type,
on_failure_escalate: should_escalate_on_failure(req.approval, sandbox_type),
approved_by_user: false,
},
SafetyCheck::AskUser => ExecPlan::AskUser { reason: None },
SafetyCheck::Reject { reason } => ExecPlan::Reject { reason },
}
}
}
fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
matches!(
(approval, sandbox),
(
AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
)
)
}