feat: add shell snapshot for shell command (#7786)

This commit is contained in:
jif-oai
2025-12-11 13:46:43 +00:00
committed by GitHub
parent b2280d6205
commit 29381ba5c2
14 changed files with 301 additions and 129 deletions

View File

@@ -85,14 +85,88 @@ async fn run_snapshot_command(command: &str) -> Result<SnapshotRun> {
_ => None,
})
.await;
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
let snapshot_arg = begin
.command
.iter()
.find(|arg| arg.contains("shell_snapshots"))
.expect("command includes shell snapshot path")
.to_owned();
let snapshot_path = PathBuf::from(&snapshot_arg);
let end = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
Ok(SnapshotRun {
begin,
end,
snapshot_path,
snapshot_content,
codex_home,
})
}
#[allow(clippy::expect_used)]
async fn run_shell_command_snapshot(command: &str) -> Result<SnapshotRun> {
let builder = test_codex().with_config(|config| {
config.features.enable(Feature::ShellSnapshot);
});
let harness = TestCodexHarness::with_builder(builder).await?;
let args = json!({
"command": command,
"timeout_ms": 1000,
});
let call_id = "shell-snapshot-command";
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let test = harness.test();
let codex = test.codex.clone();
let codex_home = test.home.path().to_path_buf();
let session_model = test.session_configured.model.clone();
let cwd = test.cwd_path().to_path_buf();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell_command with shell snapshot".into(),
}],
final_output_json_schema: None,
cwd,
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
let end = wait_for_event_match(&codex, |ev| match ev {
@@ -134,21 +208,9 @@ async fn linux_unified_exec_uses_shell_snapshot() -> Result<()> {
let run = run_snapshot_command(command).await?;
let stdout = normalize_newlines(&run.end.stdout);
let shell_path = run
.begin
.command
.first()
.expect("shell path recorded")
.clone();
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-c"));
assert_eq!(
run.begin.command.get(2).map(String::as_str),
Some(". \"$0\" && exec \"$@\"")
);
assert_eq!(run.begin.command.get(4), Some(&shell_path));
assert_eq!(run.begin.command.get(5).map(String::as_str), Some("-c"));
assert_eq!(run.begin.command.last(), Some(&command.to_string()));
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
assert_eq!(run.begin.command.len(), 3);
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert_posix_snapshot_sections(&run.snapshot_content);
assert_eq!(run.end.exit_code, 0);
@@ -160,6 +222,93 @@ async fn linux_unified_exec_uses_shell_snapshot() -> Result<()> {
Ok(())
}
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn linux_shell_command_uses_shell_snapshot() -> Result<()> {
let command = "echo shell-command-snapshot-linux";
let run = run_shell_command_snapshot(command).await?;
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
assert_eq!(run.begin.command.len(), 3);
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert_posix_snapshot_sections(&run.snapshot_content);
assert_eq!(
normalize_newlines(&run.end.stdout).trim(),
"shell-command-snapshot-linux"
);
assert_eq!(run.end.exit_code, 0);
Ok(())
}
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
let builder = test_codex().with_config(|config| {
config.features.enable(Feature::ShellSnapshot);
config.include_apply_patch_tool = true;
});
let harness = TestCodexHarness::with_builder(builder).await?;
let test = harness.test();
let codex = test.codex.clone();
let cwd = test.cwd_path().to_path_buf();
let codex_home = test.home.path().to_path_buf();
let target = cwd.join("snapshot-apply.txt");
let script = "apply_patch <<'EOF'\n*** Begin Patch\n*** Add File: snapshot-apply.txt\n+hello from snapshot\n*** End Patch\nEOF\n";
let args = json!({
"command": script,
"timeout_ms": 1_000,
});
let call_id = "shell-snapshot-apply-patch";
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let model = test.session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "apply patch via shell_command with snapshot".into(),
}],
final_output_json_schema: None,
cwd: cwd.clone(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
assert_posix_snapshot_sections(&snapshot_content);
Ok(())
}
#[cfg_attr(not(target_os = "macos"), ignore)]
#[cfg_attr(
target_os = "macos",