This commit is contained in:
jimmyfraiture
2025-09-30 11:27:23 +01:00
parent 5b74f10a7b
commit ed45f85209
6 changed files with 350 additions and 51 deletions

View File

@@ -109,3 +109,28 @@ pub(crate) fn convert_apply_patch_to_protocol(
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[test]
fn convert_apply_patch_maps_add_variant() {
let tmp = tempdir().expect("tmp");
let p = tmp.path().join("a.txt");
// Create an action with a single Add change
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let got = convert_apply_patch_to_protocol(&action);
assert_eq!(
got.get(&p),
Some(&FileChange::Add {
content: "hello".to_string()
})
);
}
}

View File

@@ -2616,10 +2616,9 @@ async fn handle_container_exec_with_params(
stdout_stream,
};
sess.executor.update_environment(
sess.executor.update_environment( // todo this should not be needed ? Not sure what it means
turn_context.sandbox_policy.clone(),
turn_context.cwd.clone(),
sess.services.codex_linux_sandbox_exe.clone(),
);
let output_result = sess

View File

@@ -1,11 +1,13 @@
use std::collections::HashMap;
use std::env;
use std::sync::Arc;
use async_trait::async_trait;
use crate::apply_patch::ApplyPatchExec;
use crate::CODEX_APPLY_PATCH_ARG1;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::executor::sandbox::build_exec_params_for_apply_patch;
use crate::function_tool::FunctionCallError;
pub(crate) enum ExecutionMode {
@@ -23,14 +25,6 @@ pub(crate) trait ExecutionBackend: Send + Sync {
// Required for downcasting the apply_patch.
mode: &ExecutionMode,
) -> Result<ExecParams, FunctionCallError>;
async fn finalize(
&self,
output: ExecToolCallOutput,
_mode: &ExecutionMode,
) -> Result<ExecToolCallOutput, FunctionCallError> {
Ok(output)
}
}
pub(crate) struct BackendStore {
@@ -86,7 +80,28 @@ impl ExecutionBackend for ApplyPatchBackend {
mode: &ExecutionMode,
) -> Result<ExecParams, FunctionCallError> {
match mode {
ExecutionMode::ApplyPatch(exec) => build_exec_params_for_apply_patch(exec, &params),
ExecutionMode::ApplyPatch(exec) => {
let path_to_codex = env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string())
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
)
})?;
let patch = exec.action.patch.clone();
Ok(ExecParams {
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
cwd: exec.action.cwd.clone(),
timeout_ms: params.timeout_ms,
// Run apply_patch with a minimal environment for determinism and to
// avoid leaking host environment variables into the patch process.
env: HashMap::new(),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
})
},
ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
"apply_patch backend invoked without patch context".to_string(),
)),

View File

@@ -23,3 +23,29 @@ impl ApprovalCache {
self.inner.lock().map(|g| g.clone()).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn insert_ignores_empty_and_dedupes() {
let cache = ApprovalCache::default();
// Empty should be ignored
cache.insert(vec![]);
assert!(cache.snapshot().is_empty());
// Insert a command and verify snapshot contains it
let cmd = vec!["foo".to_string(), "bar".to_string()];
cache.insert(cmd.clone());
let snap1 = cache.snapshot();
assert!(snap1.contains(&cmd));
// Reinserting should not create duplicates
cache.insert(cmd);
let snap2 = cache.snapshot();
assert_eq!(snap1, snap2);
}
}

View File

@@ -84,12 +84,10 @@ impl Executor {
&self,
sandbox_policy: SandboxPolicy,
sandbox_cwd: PathBuf,
codex_linux_sandbox_exe: Option<PathBuf>,
) {
if let Ok(mut cfg) = self.config.write() {
cfg.sandbox_policy = sandbox_policy;
cfg.sandbox_cwd = sandbox_cwd;
cfg.codex_linux_sandbox_exe = codex_linux_sandbox_exe;
}
}
@@ -164,11 +162,7 @@ impl Executor {
Err(err) => return Err(err.into()),
};
// Step 6: Allow the backend to post-process the raw output.
backend
.finalize(raw_output, &request.mode)
.await
.map_err(ExecError::from)
Ok(raw_output)
}
/// Fallback path invoked when a sandboxed run is denied so the user can
@@ -215,10 +209,7 @@ impl Executor {
)
.await?;
backend
.finalize(retry_output, &request.mode)
.await
.map_err(ExecError::from)
Ok(retry_output)
}
ReviewDecision::Denied | ReviewDecision::Abort => {
Err(ExecError::rejection("exec command rejected by user"))
@@ -304,3 +295,73 @@ pub(crate) fn normalize_exec_result(
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::CodexErr;
use crate::error::EnvVarError;
use crate::error::SandboxErr;
use crate::exec::StreamOutput;
use pretty_assertions::assert_eq;
fn make_output(text: &str) -> ExecToolCallOutput {
ExecToolCallOutput {
exit_code: 1,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(text.to_string()),
duration: Duration::from_millis(123),
timed_out: false,
}
}
#[test]
fn normalize_success_borrows() {
let out = make_output("ok");
let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
let normalized = normalize_exec_result(&result);
assert_eq!(normalized.event_output().aggregated_output.text, "ok");
}
#[test]
fn normalize_timeout_borrows_embedded_output() {
let out = make_output("timed out payload");
let err = CodexErr::Sandbox(SandboxErr::Timeout {
output: Box::new(out),
});
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
let normalized = normalize_exec_result(&result);
assert_eq!(
normalized.event_output().aggregated_output.text,
"timed out payload"
);
}
#[test]
fn normalize_function_error_synthesizes_payload() {
let err = FunctionCallError::RespondToModel("boom".to_string());
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
let normalized = normalize_exec_result(&result);
assert_eq!(normalized.event_output().aggregated_output.text, "boom");
}
#[test]
fn normalize_codex_error_synthesizes_user_message() {
// Use a simple EnvVar error which formats to a clear message
let e = CodexErr::EnvVar(EnvVarError {
var: "FOO".to_string(),
instructions: Some("set it".to_string()),
});
let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
let normalized = normalize_exec_result(&result);
assert!(
normalized
.event_output()
.aggregated_output
.text
.contains("Missing environment variable: `FOO`"),
"expected synthesized user-friendly message"
);
}
}

View File

@@ -53,34 +53,6 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) ->
)
}
/// Builds the command-line invocation that shells out to `codex apply_patch`
/// using the provided apply-patch request details.
pub(crate) fn build_exec_params_for_apply_patch(
exec: &ApplyPatchExec,
original: &ExecParams,
) -> Result<ExecParams, FunctionCallError> {
let path_to_codex = env::current_exe()
.ok()
.map(|p| p.to_string_lossy().to_string())
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"failed to determine path to codex executable".to_string(),
)
})?;
let patch = exec.action.patch.clone();
Ok(ExecParams {
command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
cwd: exec.action.cwd.clone(),
timeout_ms: original.timeout_ms,
// Run apply_patch with a minimal environment for determinism and to
// avoid leaking host environment variables into the patch process.
env: HashMap::new(),
with_escalated_permissions: original.with_escalated_permissions,
justification: original.justification.clone(),
})
}
/// Determines how a command should be sandboxed, prompting the user when
/// policy requires explicit approval.
pub async fn select_sandbox(
@@ -191,3 +163,204 @@ fn select_apply_patch_sandbox(
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::codex::make_session_and_context;
use crate::exec::ExecParams;
use crate::protocol::SandboxPolicy;
use codex_apply_patch::ApplyPatchAction;
use pretty_assertions::assert_eq;
#[tokio::test]
async fn select_apply_patch_user_override_when_explicit() {
let (session, _ctx) = make_session_and_context();
let tmp = tempfile::tempdir().expect("tmp");
let p = tmp.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: true,
};
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
};
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
)
.await
.expect("ok");
// Explicit user override runs without sandbox
assert_eq!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, false);
}
#[tokio::test]
async fn select_apply_patch_autoapprove_in_danger() {
let (session, _ctx) = make_session_and_context();
let tmp = tempfile::tempdir().expect("tmp");
let p = tmp.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: false,
};
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
};
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
)
.await
.expect("ok");
// On platforms with a sandbox, DangerFullAccess still prefers it
let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
assert_eq!(decision.initial_sandbox, expected);
assert_eq!(decision.escalate_on_failure, false);
}
#[tokio::test]
async fn select_apply_patch_requires_approval_on_unless_trusted() {
let (session, _ctx) = make_session_and_context();
let tempdir = tempfile::tempdir().expect("tmpdir");
let p = tempdir.path().join("a.txt");
let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
let exec = ApplyPatchExec {
action,
user_explicitly_approved_this_action: false,
};
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["apply_patch".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["apply_patch".into()],
mode: ExecutionMode::ApplyPatch(exec),
stdout_stream: None,
};
let result = select_sandbox(
&request,
AskForApproval::UnlessTrusted,
Default::default(),
&cfg,
&session,
"sub",
"call",
)
.await;
match result {
Ok(_) => panic!("expected error"),
Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
assert!(msg.contains("requires approval"))
}
Err(other) => panic!("unexpected error: {other:?}"),
}
}
#[tokio::test]
async fn select_shell_autoapprove_in_danger_mode() {
let (session, _ctx) = make_session_and_context();
let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
command: vec!["some-unknown".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["some-unknown".into()],
mode: ExecutionMode::Shell,
stdout_stream: None,
};
let decision = select_sandbox(
&request,
AskForApproval::OnRequest,
Default::default(),
&cfg,
&session,
"sub",
"call",
)
.await
.expect("ok");
assert_eq!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, false);
}
#[cfg(any(target_os = "macos", target_os = "linux"))]
#[tokio::test]
async fn select_shell_escalates_on_failure_with_platform_sandbox() {
let (session, _ctx) = make_session_and_context();
let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
let request = ExecutionRequest {
params: ExecParams {
// Unknown command => untrusted but not flagged dangerous
command: vec!["some-unknown".into()],
cwd: std::env::temp_dir(),
timeout_ms: None,
env: std::collections::HashMap::new(),
with_escalated_permissions: None,
justification: None,
},
approval_command: vec!["some-unknown".into()],
mode: ExecutionMode::Shell,
stdout_stream: None,
};
let decision = select_sandbox(
&request,
AskForApproval::OnFailure,
Default::default(),
&cfg,
&session,
"sub",
"call",
)
.await
.expect("ok");
// On macOS/Linux we should have a platform sandbox and escalate on failure
assert_ne!(decision.initial_sandbox, SandboxType::None);
assert_eq!(decision.escalate_on_failure, true);
}
}