Merge branch 'etraut/next-turn-state-remove-override-context' into etraut/next-turn-state-core

This commit is contained in:
Eric Traut
2026-05-18 19:23:13 -07:00
285 changed files with 11486 additions and 3985 deletions

View File

@@ -35,8 +35,6 @@ use wiremock::http::HeaderValue;
use wiremock::matchers::method;
use wiremock::matchers::path_regex;
use crate::test_codex::ApplyPatchModelOutput;
#[derive(Debug, Clone)]
pub struct ResponseMock {
requests: Arc<Mutex<Vec<ResponsesRequest>>>,
@@ -883,19 +881,6 @@ pub fn ev_local_shell_call(call_id: &str, status: &str, command: Vec<&str>) -> V
})
}
pub fn ev_apply_patch_call(
call_id: &str,
patch: &str,
output_type: ApplyPatchModelOutput,
) -> Value {
match output_type {
ApplyPatchModelOutput::Freeform => ev_apply_patch_custom_tool_call(call_id, patch),
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
ev_apply_patch_shell_command_call_via_heredoc(call_id, patch)
}
}
}
/// Convenience: SSE event for an `apply_patch` custom tool call with raw patch
/// text. This mirrors the payload produced by the Responses API when the model
/// invokes `apply_patch` directly.

View File

@@ -184,10 +184,9 @@ fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<Stri
String::from_utf8(output.stdout).context("docker stdout must be utf-8")
}
/// A collection of different ways the model can output an apply_patch call
/// Non-default apply_patch model output shapes used by compatibility tests.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ApplyPatchModelOutput {
Freeform,
ShellCommandViaHeredoc,
}
@@ -982,21 +981,8 @@ impl TestCodexHarness {
custom_tool_call_output_text(&bodies, call_id)
}
pub async fn apply_patch_output(
&self,
call_id: &str,
output_type: ApplyPatchModelOutput,
) -> String {
// Box the awaited output helpers so callers do not inline request
// capture and response parsing into their own async state.
match output_type {
ApplyPatchModelOutput::Freeform => {
Box::pin(self.custom_tool_call_output(call_id)).await
}
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
Box::pin(self.function_call_stdout(call_id)).await
}
}
pub async fn apply_patch_output(&self, call_id: &str) -> String {
self.custom_tool_call_output(call_id).await
}
}

View File

@@ -3,8 +3,8 @@
use anyhow::Result;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use core_test_support::responses::ev_apply_patch_call;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_apply_patch_shell_command_call_via_heredoc;
use core_test_support::responses::ev_shell_command_call;
use core_test_support::test_codex::ApplyPatchModelOutput;
use pretty_assertions::assert_eq;
@@ -45,7 +45,6 @@ use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_with_timeout;
use serde_json::json;
use test_case::test_case;
use wiremock::Mock;
use wiremock::Respond;
use wiremock::ResponseTemplate;
@@ -187,11 +186,35 @@ pub async fn mount_apply_patch(
call_id: &str,
patch: &str,
assistant_msg: &str,
output_type: ApplyPatchModelOutput,
) {
mount_sse_sequence(
harness.server(),
apply_patch_responses(call_id, patch, assistant_msg, output_type),
apply_patch_responses(
call_id,
patch,
assistant_msg,
ev_apply_patch_custom_tool_call,
),
)
.await;
}
async fn mount_apply_patch_model_output(
harness: &TestCodexHarness,
call_id: &str,
patch: &str,
assistant_msg: &str,
model_output: ApplyPatchModelOutput,
) {
let apply_patch_call = match model_output {
ApplyPatchModelOutput::ShellCommandViaHeredoc => {
ev_apply_patch_shell_command_call_via_heredoc
}
};
mount_sse_sequence(
harness.server(),
apply_patch_responses(call_id, patch, assistant_msg, apply_patch_call),
)
.await;
}
@@ -200,12 +223,12 @@ fn apply_patch_responses(
call_id: &str,
patch: &str,
assistant_msg: &str,
output_type: ApplyPatchModelOutput,
apply_patch_call: fn(&str, &str) -> serde_json::Value,
) -> Vec<String> {
vec![
sse(vec![
ev_response_created("resp-1"),
ev_apply_patch_call(call_id, patch, output_type),
apply_patch_call(call_id, patch),
ev_completed("resp-1"),
]),
sse(vec![
@@ -236,20 +259,11 @@ async fn apply_patch_cli_uses_codex_self_exe_with_linux_sandbox_helper_alias() -
let patch = "*** Begin Patch\n*** Add File: helper-alias.txt\n+hello\n*** End Patch";
let call_id = "apply-helper-alias";
mount_apply_patch(
&harness,
call_id,
patch,
"done",
ApplyPatchModelOutput::Freeform,
)
.await;
mount_apply_patch(&harness, call_id, patch, "done").await;
harness.submit("please apply helper alias patch").await?;
let out = harness
.apply_patch_output(call_id, ApplyPatchModelOutput::Freeform)
.await;
let out = harness.apply_patch_output(call_id).await;
assert_regex_match(
r"(?s)^Exit code: 0.*Success\. Updated the following files:\nA helper-alias\.txt\n?$",
&out,
@@ -260,10 +274,7 @@ async fn apply_patch_cli_uses_codex_self_exe_with_linux_sandbox_helper_alias() -
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_cli_multiple_operations_integration(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_multiple_operations_integration() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?;
@@ -275,11 +286,11 @@ async fn apply_patch_cli_multiple_operations_integration(
let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";
let call_id = "apply-multi-ops";
mount_apply_patch(&harness, call_id, patch, "done", output_type).await;
mount_apply_patch(&harness, call_id, patch, "done").await;
harness.submit("please apply multi-ops patch").await?;
let out = harness.apply_patch_output(call_id, output_type).await;
let out = harness.apply_patch_output(call_id).await;
let expected = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -302,9 +313,7 @@ D delete.txt
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_multiple_chunks() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -315,7 +324,7 @@ async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) ->
let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
let call_id = "apply-multi-chunks";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply multi-chunk patch").await?;
@@ -327,11 +336,7 @@ async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) ->
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_moves_file_to_new_directory(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_moves_file_to_new_directory() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -340,7 +345,7 @@ async fn apply_patch_cli_moves_file_to_new_directory(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
let call_id = "apply-move";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply move patch").await?;
@@ -353,11 +358,7 @@ async fn apply_patch_cli_moves_file_to_new_directory(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_file_appends_trailing_newline(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_updates_file_appends_trailing_newline() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -368,7 +369,7 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline(
let patch = "*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch";
let call_id = "apply-append-nl";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply newline patch").await?;
@@ -379,11 +380,7 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_insert_only_hunk_modifies_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_insert_only_hunk_modifies_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -394,7 +391,7 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file(
let patch = "*** Begin Patch\n*** Update File: insert_only.txt\n@@\n alpha\n+beta\n omega\n*** End Patch";
let call_id = "apply-insert-only";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("insert lines via apply_patch").await?;
@@ -406,11 +403,7 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_move_overwrites_existing_destination(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_move_overwrites_existing_destination() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -422,7 +415,7 @@ async fn apply_patch_cli_move_overwrites_existing_destination(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch";
let call_id = "apply-move-overwrite";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply move overwrite patch").await?;
@@ -435,11 +428,7 @@ async fn apply_patch_cli_move_overwrites_existing_destination(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_move_without_content_change_has_no_turn_diff() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -450,7 +439,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/name.txt\n@@\n same\n*** End Patch";
let call_id = "apply-move-no-change";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
submit_without_wait(&harness, "rename without content change").await?;
@@ -472,11 +461,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_add_overwrites_existing_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_add_overwrites_existing_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -485,7 +470,7 @@ async fn apply_patch_cli_add_overwrites_existing_file(
let patch = "*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch";
let call_id = "apply-add-overwrite";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply add overwrite patch").await?;
@@ -497,22 +482,18 @@ async fn apply_patch_cli_add_overwrites_existing_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_invalid_hunk_header(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_invalid_hunk_header() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch";
let call_id = "apply-invalid-header";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply invalid header patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -526,11 +507,7 @@ async fn apply_patch_cli_rejects_invalid_hunk_header(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_reports_missing_context(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_reports_missing_context() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -540,11 +517,11 @@ async fn apply_patch_cli_reports_missing_context(
let patch =
"*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch";
let call_id = "apply-missing-context";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply missing context patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -559,22 +536,18 @@ async fn apply_patch_cli_reports_missing_context(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_reports_missing_target_file(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_reports_missing_target_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Update File: missing.txt\n@@\n-nope\n+better\n*** End Patch";
let call_id = "apply-missing-file";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("attempt to update a missing file").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
"expected verification failure message"
@@ -592,22 +565,18 @@ async fn apply_patch_cli_reports_missing_target_file(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_delete_missing_file_reports_error(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_delete_missing_file_reports_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch";
let call_id = "apply-delete-missing";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("attempt to delete missing file").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("apply_patch verification failed"),
@@ -626,20 +595,18 @@ async fn apply_patch_cli_delete_missing_file_reports_error(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_empty_patch(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_rejects_empty_patch() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let patch = "*** Begin Patch\n*** End Patch";
let call_id = "apply-empty";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply empty patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains("patch rejected: empty patch"),
"expected rejection for empty patch: {out}"
@@ -648,11 +615,7 @@ async fn apply_patch_cli_rejects_empty_patch(model_output: ApplyPatchModelOutput
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_delete_directory_reports_verification_error(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -661,22 +624,18 @@ async fn apply_patch_cli_delete_directory_reports_verification_error(
let patch = "*** Begin Patch\n*** Delete File: dir\n*** End Patch";
let call_id = "apply-delete-dir";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("delete a directory via apply_patch").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(out.contains("apply_patch verification failed"));
assert!(out.contains("Failed to read"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -692,7 +651,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
let patch = "*** Begin Patch\n*** Add File: ../escape.txt\n+outside\n*** End Patch";
let call_id = "apply-path-traversal";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -701,7 +660,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains(
"patch rejected: writing outside of the project; rejected by user approval settings"
@@ -717,10 +676,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn intercepted_apply_patch_verification_uses_local_sandbox(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn intercepted_apply_patch_verification_uses_local_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(Ok(()), "symlink setup needs local filesystem link creation");
@@ -740,7 +696,14 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
*** End Patch"#
);
let call_id = "apply-sandboxed-read";
mount_apply_patch(&harness, call_id, &patch, "fail", model_output).await;
mount_apply_patch_model_output(
&harness,
call_id,
&patch,
"fail",
ApplyPatchModelOutput::ShellCommandViaHeredoc,
)
.await;
harness
.submit_with_permission_profile(
@@ -749,7 +712,11 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.function_call_stdout(call_id).await;
assert!(
serde_json::from_str::<serde_json::Value>(&out).is_err(),
"expected heredoc apply_patch output to be plain text"
);
assert!(
out.contains("apply_patch verification failed"),
"expected sandboxed verification failure: {out}"
@@ -767,11 +734,7 @@ async fn intercepted_apply_patch_verification_uses_local_sandbox(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform ; "freeform")]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(
Ok(()),
@@ -815,7 +778,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
*** End Patch"#
);
let call_id = "apply-symlink-escape";
mount_apply_patch(&harness, call_id, &patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, &patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -824,7 +787,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert_eq!(
std::fs::read_to_string(&outside_file)?,
original_contents,
@@ -836,11 +799,7 @@ async fn apply_patch_cli_does_not_write_through_symlink_escape_outside_workspace
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform ; "freeform")]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc ; "shell_command_heredoc")]
async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_remote!(
Ok(()),
@@ -876,7 +835,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
*** End Patch"#
);
let call_id = "apply-hard-link";
mount_apply_patch(&harness, call_id, &patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, &patch, "ok").await;
harness
.submit_with_permission_profile(
@@ -885,7 +844,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
if cfg!(windows) {
assert!(
out.contains("patch rejected: writing outside of the project"),
@@ -938,11 +897,7 @@ async fn apply_patch_cli_preserves_existing_hard_link_outside_workspace(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -960,7 +915,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
let patch = "*** Begin Patch\n*** Update File: stay.txt\n*** Move to: ../escape-move.txt\n@@\n-from\n+to\n*** End Patch";
let call_id = "apply-move-traversal";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness
.submit_with_permission_profile(
@@ -969,7 +924,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
)
.await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(
out.contains(
"patch rejected: writing outside of the project; rejected by user approval settings"
@@ -985,11 +940,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_verification_failure_has_no_side_effects(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -998,7 +949,7 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects(
let call_id = "apply-partial-no-side-effects";
let patch = "*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch";
mount_apply_patch(&harness, call_id, patch, "failed", model_output).await;
mount_apply_patch(&harness, call_id, patch, "failed").await;
harness.submit("attempt partial apply patch").await?;
@@ -1398,14 +1349,7 @@ async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nest
let call_id = "apply-nested-cwd-repo-relative";
let patch = "*** Begin Patch\n*** Update File: ../repo.txt\n@@\n-before\n+after\n*** End Patch";
mount_apply_patch(
&harness,
call_id,
patch,
"updated repo-relative path",
ApplyPatchModelOutput::Freeform,
)
.await;
mount_apply_patch(&harness, call_id, patch, "updated repo-relative path").await;
submit_without_wait(&harness, "update file outside nested cwd but inside repo").await?;
@@ -1490,10 +1434,7 @@ async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() ->
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1502,18 +1443,36 @@ async fn apply_patch_shell_accepts_lenient_heredoc_wrapped_patch(
let patch_inner =
format!("*** Begin Patch\n*** Add File: {file_name}\n+lenient\n*** End Patch\n");
let call_id = "apply-lenient";
mount_apply_patch(&harness, call_id, patch_inner.as_str(), "ok", model_output).await;
mount_apply_patch_model_output(
&harness,
call_id,
patch_inner.as_str(),
"ok",
ApplyPatchModelOutput::ShellCommandViaHeredoc,
)
.await;
harness.submit("apply lenient heredoc patch").await?;
let out = harness.function_call_stdout(call_id).await;
assert!(
serde_json::from_str::<serde_json::Value>(&out).is_err(),
"expected heredoc apply_patch output to be plain text"
);
assert!(
out.contains("Success. Updated the following files:"),
"expected successful apply_patch output: {out}"
);
assert!(
out.contains(&format!("A {file_name}")),
"expected created file in apply_patch output: {out}"
);
assert_eq!(harness.read_file_text(file_name).await?, "lenient\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput) -> Result<()> {
async fn apply_patch_cli_end_of_file_anchor() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1522,7 +1481,7 @@ async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput)
let patch = "*** Begin Patch\n*** Update File: tail.txt\n@@\n-last\n+end\n*** End of File\n*** End Patch";
let call_id = "apply-eof";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply EOF-anchored patch").await?;
assert_eq!(harness.read_file_text("tail.txt").await?, "alpha\nend\n");
@@ -1530,11 +1489,7 @@ async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_missing_second_chunk_context_rejected(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_cli_missing_second_chunk_context_rejected() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1545,11 +1500,11 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected(
let patch =
"*** Begin Patch\n*** Update File: two_chunks.txt\n@@\n-b\n+B\n\n-d\n+D\n*** End Patch";
let call_id = "apply-missing-ctx-2nd";
mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;
mount_apply_patch(&harness, call_id, patch, "fail").await;
harness.submit("apply missing context second chunk").await?;
let out = harness.apply_patch_output(call_id, model_output).await;
let out = harness.apply_patch_output(call_id).await;
assert!(out.contains("apply_patch verification failed"));
assert!(
out.contains("Failed to find expected lines in"),
@@ -1564,11 +1519,7 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_emits_turn_diff_event_with_unified_diff(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_emits_turn_diff_event_with_unified_diff() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1578,7 +1529,7 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff(
let call_id = "apply-diff-event";
let file = "udiff.txt";
let patch = format!("*** Begin Patch\n*** Add File: {file}\n+hello\n*** End Patch\n");
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch.as_str(), "ok").await;
submit_without_wait(&harness, "emit diff").await?;
@@ -1794,11 +1745,7 @@ async fn apply_patch_clears_aggregated_diff_after_inexact_delta() -> Result<()>
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_change_context_disambiguates_target(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_change_context_disambiguates_target() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -1810,7 +1757,7 @@ async fn apply_patch_change_context_disambiguates_target(
let patch =
"*** Begin Patch\n*** Update File: multi_ctx.txt\n@@ fn b\n-x=10\n+x=11\n*** End Patch";
let call_id = "apply-ctx";
mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;
mount_apply_patch(&harness, call_id, patch, "ok").await;
harness.submit("apply with change_context").await?;

View File

@@ -400,10 +400,6 @@ if (!tool) {
.features
.enable(Feature::Apps)
.expect("test config should allow feature update");
config
.features
.enable(Feature::ToolSearch)
.expect("test config should allow feature update");
config
.features
.enable(Feature::CodeMode)

View File

@@ -3652,22 +3652,9 @@ async fn post_tool_use_records_additional_context_for_apply_patch() -> Result<()
let tool_response = hook_inputs[0]["tool_response"]
.as_str()
.context("apply_patch tool_response should be a string")?;
let mut parsed_tool_response = serde_json::from_str::<Value>(tool_response)?;
if let Some(metadata) = parsed_tool_response
.get_mut("metadata")
.and_then(Value::as_object_mut)
{
let _ = metadata.remove("duration_seconds");
}
assert_eq!(
parsed_tool_response,
serde_json::json!({
"output": "Success. Updated the following files:\nA post_tool_use_apply_patch.txt\n",
"metadata": {
"exit_code": 0,
},
})
);
assert!(tool_response.starts_with("Exit code: 0"));
assert!(tool_response.contains("Success. Updated the following files:"));
assert!(tool_response.contains("A post_tool_use_apply_patch.txt"));
Ok(())
}

View File

@@ -705,7 +705,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
// Second turn using per-turn overrides via UserTurn
// Second turn using per-turn thread-settings overrides.
let new_cwd = TempDir::new().unwrap();
let writable = TempDir::new().unwrap();
let permission_profile = PermissionProfile::workspace_write_with(

View File

@@ -15,7 +15,6 @@ use codex_protocol::protocol::ExecApprovalRequestEvent;
use codex_protocol::protocol::GranularApprovalConfig;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::request_permissions::PermissionGrantScope;
use codex_protocol::request_permissions::RequestPermissionProfile;
use codex_protocol::request_permissions::RequestPermissionsResponse;
@@ -33,6 +32,7 @@ use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
@@ -184,9 +184,11 @@ async fn submit_turn(
test: &TestCodex,
prompt: &str,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
permission_profile: CorePermissionProfile,
) -> Result<()> {
let session_model = test.session_configured.model.clone();
let (sandbox_policy, permission_profile) =
turn_permission_fields(permission_profile, test.cwd.path());
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
@@ -201,6 +203,7 @@ async fn submit_turn(
approval_policy: Some(approval_policy),
approvals_reviewer: Some(ApprovalsReviewer::User),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -290,16 +293,7 @@ async fn expect_request_permissions_event(
}
}
fn workspace_write_excluding_tmp() -> SandboxPolicy {
SandboxPolicy::WorkspaceWrite {
writable_roots: vec![],
network_access: false,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
}
}
fn workspace_write_excluding_tmp_profile() -> CorePermissionProfile {
fn workspace_write_excluding_tmp() -> CorePermissionProfile {
CorePermissionProfile::workspace_write_with(
&[],
NetworkSandboxPolicy::Restricted,
@@ -335,7 +329,7 @@ async fn with_additional_permissions_requires_approval_under_on_request() -> Res
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let permission_profile = CorePermissionProfile::read_only();
let permission_profile_for_config = CorePermissionProfile::read_only();
let mut builder = test_codex().with_config(move |config| {
@@ -389,7 +383,7 @@ async fn with_additional_permissions_requires_approval_under_on_request() -> Res
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, command).await;
assert_eq!(
approval.additional_permissions,
@@ -433,7 +427,7 @@ async fn request_permissions_tool_is_auto_denied_when_granular_request_permissio
request_permissions: false,
mcp_elicitations: true,
});
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let permission_profile = CorePermissionProfile::read_only();
let permission_profile_for_config = CorePermissionProfile::read_only();
let mut builder = test_codex().with_config(move |config| {
@@ -481,7 +475,7 @@ async fn request_permissions_tool_is_auto_denied_when_granular_request_permissio
&test,
"request permissions under granular.request_permissions = false",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -519,7 +513,7 @@ async fn relative_additional_permissions_resolve_against_tool_workdir() -> Resul
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let permission_profile = CorePermissionProfile::read_only();
let permission_profile_for_config = CorePermissionProfile::read_only();
let mut builder = test_codex().with_config(move |config| {
@@ -583,7 +577,7 @@ async fn relative_additional_permissions_resolve_against_tool_workdir() -> Resul
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, command).await;
assert_eq!(
@@ -623,7 +617,7 @@ async fn read_only_with_additional_permissions_does_not_widen_to_unrequested_cwd
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let permission_profile = CorePermissionProfile::read_only();
let permission_profile_for_config = CorePermissionProfile::read_only();
let mut builder = test_codex().with_config(move |config| {
@@ -680,7 +674,7 @@ async fn read_only_with_additional_permissions_does_not_widen_to_unrequested_cwd
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, &command).await;
assert_eq!(
@@ -726,7 +720,7 @@ async fn read_only_with_additional_permissions_does_not_widen_to_unrequested_tmp
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let permission_profile = CorePermissionProfile::read_only();
let permission_profile_for_config = CorePermissionProfile::read_only();
let mut builder = test_codex().with_config(move |config| {
@@ -784,7 +778,7 @@ async fn read_only_with_additional_permissions_does_not_widen_to_unrequested_tmp
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, &command).await;
assert_eq!(
@@ -828,8 +822,8 @@ async fn workspace_write_with_additional_permissions_can_write_outside_cwd() ->
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -895,7 +889,7 @@ async fn workspace_write_with_additional_permissions_can_write_outside_cwd() ->
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, &command).await;
assert_eq!(
@@ -935,8 +929,8 @@ async fn with_additional_permissions_denied_approval_blocks_execution() -> Resul
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1000,7 +994,7 @@ async fn with_additional_permissions_denied_approval_blocks_execution() -> Resul
)
.await;
submit_turn(&test, call_id, approval_policy, sandbox_policy.clone()).await?;
submit_turn(&test, call_id, approval_policy, permission_profile.clone()).await?;
let approval = expect_exec_approval(&test, &command).await;
assert_eq!(
@@ -1043,8 +1037,8 @@ async fn request_permissions_grants_apply_to_later_exec_command_calls() -> Resul
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1115,7 +1109,7 @@ async fn request_permissions_grants_apply_to_later_exec_command_calls() -> Resul
&test,
"write outside the workspace",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -1170,8 +1164,8 @@ async fn request_permissions_preapprove_explicit_exec_permissions_outside_on_req
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1233,7 +1227,7 @@ async fn request_permissions_preapprove_explicit_exec_permissions_outside_on_req
&test,
"write outside the workspace",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -1291,8 +1285,8 @@ async fn request_permissions_grants_apply_to_later_shell_command_calls() -> Resu
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1350,7 +1344,7 @@ async fn request_permissions_grants_apply_to_later_shell_command_calls() -> Resu
&test,
"write outside the workspace",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -1406,8 +1400,8 @@ async fn request_permissions_grants_apply_to_later_shell_command_calls_without_i
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1463,7 +1457,7 @@ async fn request_permissions_grants_apply_to_later_shell_command_calls_without_i
&test,
"write outside the workspace without inline permission feature",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -1521,8 +1515,8 @@ async fn partial_request_permissions_grants_do_not_preapprove_new_permissions()
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1616,7 +1610,7 @@ async fn partial_request_permissions_grants_do_not_preapprove_new_permissions()
&test,
"write outside the workspace",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
@@ -1688,8 +1682,8 @@ async fn request_permissions_grants_do_not_carry_across_turns() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1738,7 +1732,7 @@ async fn request_permissions_grants_do_not_carry_across_turns() -> Result<()> {
&test,
"request permissions for later use",
approval_policy,
sandbox_policy.clone(),
permission_profile.clone(),
)
.await?;
@@ -1783,7 +1777,7 @@ async fn request_permissions_grants_do_not_carry_across_turns() -> Result<()> {
&test,
"try to reuse permissions in a later turn",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;
wait_for_completion(&test).await;
@@ -1804,8 +1798,8 @@ async fn request_permissions_session_grants_carry_across_turns() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp_profile();
let permission_profile = workspace_write_excluding_tmp();
let permission_profile_for_config = workspace_write_excluding_tmp();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1859,7 +1853,7 @@ async fn request_permissions_session_grants_carry_across_turns() -> Result<()> {
&test,
"request session permissions for later use",
approval_policy,
sandbox_policy.clone(),
permission_profile.clone(),
)
.await?;
@@ -1901,7 +1895,7 @@ async fn request_permissions_session_grants_carry_across_turns() -> Result<()> {
&test,
"reuse session permissions in a later turn",
approval_policy,
sandbox_policy,
permission_profile,
)
.await?;

View File

@@ -27,7 +27,6 @@ fn resume_history(
let turn_id = "resume-warning-seed-turn".to_string();
let turn_ctx = TurnContextItem {
turn_id: Some(turn_id.clone()),
trace_id: None,
cwd: config.cwd.to_path_buf(),
current_date: None,
timezone: None,
@@ -44,10 +43,6 @@ fn resume_history(
summary: config
.model_reasoning_summary
.unwrap_or(ReasoningSummary::Auto),
user_instructions: None,
developer_instructions: None,
final_output_json_schema: None,
truncation_policy: None,
};
InitialHistory::Resumed(ResumedHistory {

View File

@@ -4,7 +4,6 @@
use anyhow::Result;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_core::config::Config;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
@@ -113,14 +112,6 @@ fn tool_search_output_has_namespace_child(
namespace_child_tool(&output, namespace, tool_name).is_some()
}
fn configure_apps_without_tool_search(config: &mut Config, apps_base_url: &str) {
configure_search_capable_apps(config, apps_base_url);
config
.features
.disable(Feature::ToolSearch)
.expect("test config should allow feature update");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn search_tool_enabled_by_default_adds_tool_search() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -225,54 +216,6 @@ async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_search_disabled_exposes_apps_tools_directly() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let apps_server = AppsTestServer::mount_searchable(&server).await?;
let mock = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(move |config| {
configure_apps_without_tool_search(config, apps_server.chatgpt_base_url.as_str())
});
let test = builder.build(&server).await?;
test.submit_turn_with_approval_and_permission_profile(
"list tools",
AskForApproval::Never,
PermissionProfile::Disabled,
)
.await?;
let body = mock.single_request().body_json();
let tools = tool_names(&body);
assert!(!tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME));
assert!(
namespace_child_tool(
&body,
SEARCH_CALENDAR_NAMESPACE,
SEARCH_CALENDAR_CREATE_TOOL
)
.is_some()
);
assert!(
namespace_child_tool(&body, SEARCH_CALENDAR_NAMESPACE, SEARCH_CALENDAR_LIST_TOOL).is_some()
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -12,16 +12,12 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::ApplyPatchModelOutput;
use core_test_support::test_codex::ShellModelOutput;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use std::fs;
use test_case::test_case;
use crate::suite::apply_patch_cli::apply_patch_harness;
use crate::suite::apply_patch_cli::mount_apply_patch;
@@ -38,135 +34,68 @@ const FIXTURE_JSON: &str = r#"{
}
"#;
fn shell_responses(
call_id: &str,
command: Vec<&str>,
output_type: ShellModelOutput,
) -> Result<Vec<String>> {
match output_type {
ShellModelOutput::ShellCommand => {
let command = shlex::try_join(command)?;
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
call_id,
"shell_command",
&serde_json::to_string(&parameters)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
}
}
fn configure_shell_model(
builder: TestCodexBuilder,
output_type: ShellModelOutput,
) -> TestCodexBuilder {
match output_type {
ShellModelOutput::ShellCommand => builder.with_model("test-gpt-5-codex"),
}
fn shell_responses(call_id: &str, command: Vec<&str>) -> Result<Vec<String>> {
let command = shlex::try_join(command)?;
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
call_id,
"shell_command",
&serde_json::to_string(&parameters)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_is_structured_with_freeform_apply_patch(
output_type: ShellModelOutput,
) -> Result<()> {
async fn shell_output_preserves_fixture_json_as_freeform() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"run the structured shell command",
PermissionProfile::Disabled,
)
.await?;
let req = mock
.last_request()
.expect("structured shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
assert!(
serde_json::from_str::<Value>(output).is_err(),
"expected structured shell output to be plain text",
);
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
freeform shell
?$";
assert_regex_match(expected_pattern, output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_structures_fixture_with_serialization(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let mut builder = test_codex().with_model("test-gpt-5-codex");
let test = builder.build(&server).await?;
let fixture_path = test.cwd.path().join("fixture.json");
fs::write(&fixture_path, FIXTURE_JSON)?;
let fixture_path_str = fixture_path.to_string_lossy().to_string();
let call_id = "shell-structured-fixture";
let call_id = "shell-freeform-fixture";
let responses = shell_responses(
call_id,
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
output_type,
)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"read the fixture JSON with structured output",
"read the fixture JSON with shell output",
PermissionProfile::Disabled,
)
.await?;
let req = mock
.last_request()
.expect("structured output request recorded");
let req = mock.last_request().expect("shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
.expect("shell output string");
assert!(
serde_json::from_str::<Value>(output).is_err(),
"expected structured output to be plain text"
"expected shell output to be plain text"
);
let (header, body) = output
.split_once("Output:\n")
.expect("structured output contains an Output section");
.expect("shell output contains an Output section");
assert_regex_match(
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds$",
header.trim_end(),
@@ -180,34 +109,26 @@ async fn shell_output_structures_fixture_with_serialization(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_for_freeform_tool_records_duration(
output_type: ShellModelOutput,
) -> Result<()> {
async fn shell_output_records_duration() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = configure_shell_model(test_codex(), output_type);
let mut builder = test_codex().with_model("test-gpt-5-codex");
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 0.2"], output_type)?;
let call_id = "shell-freeform";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 0.2"])?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(
"run the structured shell command",
PermissionProfile::Disabled,
)
.await?;
test.submit_turn_with_permission_profile("run the shell command", PermissionProfile::Disabled)
.await?;
let req = mock
.last_request()
.expect("structured output request recorded");
let req = mock.last_request().expect("shell output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("structured output string");
.expect("shell output string");
let expected_pattern = r#"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -221,7 +142,7 @@ $"#;
.captures(output)
.and_then(|caps| caps.get(1))
.and_then(|value| value.as_str().parse::<f32>().ok())
.expect("expected structured shell output to contain wall time seconds");
.expect("expected shell output to contain wall time seconds");
assert!(
wall_time_seconds > 0.1,
"expected wall time to be greater than zero seconds, got {wall_time_seconds}"
@@ -231,53 +152,7 @@ $"#;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_output_is_structured(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-structured";
let file_name = "structured.txt";
let patch = format!(
r#"*** Begin Patch
*** Add File: {file_name}
+from custom tool
*** End Patch
"#
);
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
harness
.test()
.submit_turn_with_permission_profile(
"apply the patch via custom tool",
PermissionProfile::Disabled,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_creates_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -287,7 +162,7 @@ async fn apply_patch_custom_tool_call_creates_file(
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
);
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch done").await;
harness
.test()
@@ -297,7 +172,7 @@ async fn apply_patch_custom_tool_call_creates_file(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -319,10 +194,7 @@ A {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_updates_existing_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -333,14 +205,7 @@ async fn apply_patch_custom_tool_call_updates_existing_file(
let patch = format!(
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
);
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch update done",
output_type,
)
.await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch update done").await;
harness
.test()
@@ -350,7 +215,7 @@ async fn apply_patch_custom_tool_call_updates_existing_file(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -369,10 +234,7 @@ M {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_custom_tool_call_reports_failure_output(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
@@ -382,14 +244,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
let patch = format!(
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
);
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch failure done",
output_type,
)
.await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch failure done").await;
harness
.test()
@@ -399,7 +254,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let output = harness.apply_patch_output(call_id).await;
let expected_output = format!(
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
@@ -411,49 +266,7 @@ async fn apply_patch_custom_tool_call_reports_failure_output(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
async fn apply_patch_tool_output_is_structured(output_type: ApplyPatchModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-function";
let file_name = "freeform_apply_patch.txt";
let patch =
format!("*** Begin Patch\n*** Add File: {file_name}\n+via apply_patch\n*** End Patch\n");
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch function done",
output_type,
)
.await;
harness
.test()
.submit_turn_with_permission_profile(
"apply the patch via freeform apply_patch",
PermissionProfile::Disabled,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::ShellCommand)]
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
async fn shell_output_is_freeform_for_nonzero_exit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -461,7 +274,7 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp
let test = builder.build(&server).await?;
let call_id = "shell-nonzero-exit";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"])?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_permission_profile(

View File

@@ -15,7 +15,6 @@ use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecCommandSource;
use codex_protocol::protocol::ExecCommandStatus;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use core_test_support::assert_regex_match;
use core_test_support::managed_network_requirements_loader;
@@ -35,6 +34,7 @@ use core_test_support::skip_if_windows;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
@@ -185,9 +185,11 @@ async fn wait_for_raw_unified_exec_output(
async fn submit_unified_exec_turn(
test: &TestCodex,
prompt: &str,
sandbox_policy: SandboxPolicy,
permission_profile: PermissionProfile,
) -> Result<()> {
let session_model = test.session_configured.model.clone();
let (sandbox_policy, permission_profile) =
turn_permission_fields(permission_profile, test.config.cwd.as_path());
test.codex
.submit(Op::UserInput {
@@ -202,6 +204,7 @@ async fn submit_unified_exec_turn(
cwd: Some(test.config.cwd.to_path_buf()),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -276,6 +279,8 @@ async fn unified_exec_intercepts_apply_patch_exec_command() -> Result<()> {
let codex = test.codex.clone();
let cwd = test.cwd_path().to_path_buf();
let session_model = test.session_configured.model.clone();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::Disabled, &cwd);
codex
.submit(Op::UserInput {
@@ -289,7 +294,8 @@ async fn unified_exec_intercepts_apply_patch_exec_command() -> Result<()> {
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::DangerFullAccess),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -411,7 +417,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
];
mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "emit begin event", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "emit begin event", PermissionProfile::Disabled).await?;
let begin_event = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
@@ -475,7 +481,7 @@ async fn unified_exec_resolves_relative_workdir() -> Result<()> {
submit_unified_exec_turn(
&test,
"run relative workdir test",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -540,7 +546,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "run workdir test", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "run workdir test", PermissionProfile::Disabled).await?;
let begin_event = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
@@ -617,7 +623,7 @@ async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
];
mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "emit end event", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "emit end event", PermissionProfile::Disabled).await?;
let end_event = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
@@ -675,7 +681,7 @@ async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
];
mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "emit delta", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "emit delta", PermissionProfile::Disabled).await?;
let event = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
@@ -737,7 +743,7 @@ async fn unified_exec_full_lifecycle_with_background_end_event() -> Result<()> {
submit_unified_exec_turn(
&test,
"exercise full unified exec lifecycle",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -880,7 +886,7 @@ async fn unified_exec_short_lived_network_denial_emits_failed_end_event() -> Res
#[allow(clippy::expect_used)]
async fn unified_exec_network_denial_test(
server: &wiremock::MockServer,
) -> Result<(TestCodex, SandboxPolicy)> {
) -> Result<(TestCodex, PermissionProfile)> {
use codex_config::Constrained;
use std::sync::Arc;
use tempfile::TempDir;
@@ -905,10 +911,7 @@ allow_local_binding = true
/*exclude_tmpdir_env_var*/ false,
/*exclude_slash_tmp*/ false,
);
let sandbox_policy = permission_profile_for_config
.clone()
.to_legacy_sandbox_policy(home.path())
.expect("workspace-write profile should project to legacy policy");
let permission_profile = permission_profile_for_config.clone();
let mut builder = test_codex()
.with_home(home)
.with_cloud_requirements(managed_network_requirements_loader())
@@ -930,7 +933,7 @@ allow_local_binding = true
"expected managed network proxy config to be present"
);
Ok((test, sandbox_policy))
Ok((test, permission_profile))
}
async fn mount_unified_exec_network_denial_responses(
@@ -1048,7 +1051,7 @@ async fn unified_exec_emits_terminal_interaction_for_write_stdin() -> Result<()>
];
mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "stdin delta", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "stdin delta", PermissionProfile::Disabled).await?;
let mut terminal_interaction = None;
@@ -1168,7 +1171,7 @@ async fn unified_exec_terminal_interaction_captures_delayed_output() -> Result<(
submit_unified_exec_turn(
&test,
"delayed terminal interaction output",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1313,7 +1316,7 @@ async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
submit_unified_exec_turn(
&test,
"check poll event behavior",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1403,7 +1406,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "run metadata test", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "run metadata test", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
@@ -1502,7 +1505,7 @@ async fn exec_command_clamps_model_requested_max_output_tokens_to_policy() -> Re
submit_unified_exec_turn(
&test,
"run clamped max output test",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1585,7 +1588,7 @@ async fn write_stdin_clamps_model_requested_max_output_tokens_to_policy() -> Res
submit_unified_exec_turn(
&test,
"run clamped write_stdin output test",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1650,7 +1653,7 @@ async fn unified_exec_defaults_to_pipe() -> Result<()> {
submit_unified_exec_turn(
&test,
"check default pipe mode",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1717,7 +1720,7 @@ async fn unified_exec_can_enable_tty() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "check tty enabled", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "check tty enabled", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
@@ -1784,7 +1787,7 @@ async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
submit_unified_exec_turn(
&test,
"watch early exit timing",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -1903,7 +1906,7 @@ async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
submit_unified_exec_turn(
&test,
"test write_stdin exit behavior",
SandboxPolicy::DangerFullAccess,
PermissionProfile::Disabled,
)
.await?;
@@ -2056,7 +2059,7 @@ async fn unified_exec_emits_end_event_when_session_dies_via_stdin() -> Result<()
];
mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "end on exit", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "end on exit", PermissionProfile::Disabled).await?;
// We expect the ExecCommandEnd event to match the initial exec_command call_id.
let end_event = wait_for_event_match(&test.codex, |msg| match msg {
@@ -2122,6 +2125,9 @@ async fn unified_exec_keeps_long_running_session_after_turn_end() -> Result<()>
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
let turn_cwd = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::Disabled, &turn_cwd);
codex
.submit(Op::UserInput {
@@ -2133,9 +2139,10 @@ async fn unified_exec_keeps_long_running_session_after_turn_end() -> Result<()>
final_output_json_schema: None,
responsesapi_client_metadata: None,
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd.path().to_path_buf()),
cwd: Some(turn_cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::DangerFullAccess),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -2221,6 +2228,9 @@ async fn unified_exec_interrupt_preserves_long_running_session() -> Result<()> {
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
let turn_cwd = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::Disabled, &turn_cwd);
codex
.submit(Op::UserInput {
@@ -2232,9 +2242,10 @@ async fn unified_exec_interrupt_preserves_long_running_session() -> Result<()> {
final_output_json_schema: None,
responsesapi_client_metadata: None,
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd.path().to_path_buf()),
cwd: Some(turn_cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::DangerFullAccess),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -2330,7 +2341,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "run unified exec", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "run unified exec", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
@@ -2448,12 +2459,7 @@ PY
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(
&test,
"exercise lag handling",
SandboxPolicy::DangerFullAccess,
)
.await?;
submit_unified_exec_turn(&test, "exercise lag handling", PermissionProfile::Disabled).await?;
// This is a worst case scenario for the truncate logic, and CI can spend a
// while draining the lagged tail before the follow-up tool call completes.
wait_for_event_with_timeout(
@@ -2548,7 +2554,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "check timeout", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "check timeout", PermissionProfile::Disabled).await?;
loop {
let event = test.codex.next_event().await.expect("event");
@@ -2624,12 +2630,7 @@ PY
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(
&test,
"summarize large output",
SandboxPolicy::DangerFullAccess,
)
.await?;
submit_unified_exec_turn(&test, "summarize large output", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
@@ -2699,6 +2700,9 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {
let request_log = mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
let turn_cwd = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::read_only(), &turn_cwd);
codex
.submit(Op::UserInput {
@@ -2710,9 +2714,10 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {
final_output_json_schema: None,
responsesapi_client_metadata: None,
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd.path().to_path_buf()),
cwd: Some(turn_cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::new_read_only_policy()),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -2818,7 +2823,9 @@ async fn unified_exec_enforces_glob_deny_read_policy() -> Result<()> {
let request_log = mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
let read_only_policy = SandboxPolicy::new_read_only_policy();
let turn_cwd = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::read_only(), &turn_cwd);
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
@@ -2829,9 +2836,10 @@ async fn unified_exec_enforces_glob_deny_read_policy() -> Result<()> {
final_output_json_schema: None,
responsesapi_client_metadata: None,
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd.path().to_path_buf()),
cwd: Some(turn_cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(read_only_policy),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -2952,6 +2960,9 @@ async fn unified_exec_python_prompt_under_seatbelt() -> Result<()> {
let request_log = mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
let turn_cwd = cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::read_only(), &turn_cwd);
codex
.submit(Op::UserInput {
@@ -2963,9 +2974,10 @@ async fn unified_exec_python_prompt_under_seatbelt() -> Result<()> {
final_output_json_schema: None,
responsesapi_client_metadata: None,
thread_settings: codex_protocol::protocol::ThreadSettingsOverrides {
cwd: Some(cwd.path().to_path_buf()),
cwd: Some(turn_cwd),
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::new_read_only_policy()),
sandbox_policy: Some(sandbox_policy),
permission_profile,
collaboration_mode: Some(codex_protocol::config_types::CollaborationMode {
mode: codex_protocol::config_types::ModeKind::Default,
settings: codex_protocol::config_types::Settings {
@@ -3052,12 +3064,7 @@ async fn unified_exec_runs_on_all_platforms() -> Result<()> {
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(
&test,
"summarize large output",
SandboxPolicy::DangerFullAccess,
)
.await?;
submit_unified_exec_turn(&test, "summarize large output", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
@@ -3175,7 +3182,7 @@ async fn unified_exec_prunes_exited_sessions_first() -> Result<()> {
let response_mock =
mount_sse_sequence(&server, vec![first_response, completion_response]).await;
submit_unified_exec_turn(&test, "fill session cache", SandboxPolicy::DangerFullAccess).await?;
submit_unified_exec_turn(&test, "fill session cache", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))