chore(core) Add shell_serialization coverage (#6810)

## Summary Similar to #6545, this PR updates the shell_serialization test suite to cover the various `shell` tool invocations we have. Note that this does not cover unified_exec, which has its own suite of tests. This should provide some test coverage for when we eventually consolidate serialization logic. ## Testing - [x] These are tests
2026-04-26 15:45:02 +00:00 · 2025-11-17 19:10:56 -08:00
parent ddcc60a085
commit 2b7378ac77
3 changed files with 279 additions and 299 deletions
--- a/codex-rs/core/tests/suite/shell_serialization.rs
+++ b/codex-rs/core/tests/suite/shell_serialization.rs
@@ -1,14 +1,13 @@
 #![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used)]

 use anyhow::Result;
 use codex_core::features::Feature;
 use codex_core::model_family::find_family_for_model;
 use codex_core::protocol::SandboxPolicy;
 use core_test_support::assert_regex_match;
-use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
-use core_test_support::responses::ev_custom_tool_call;
 use core_test_support::responses::ev_function_call;
 use core_test_support::responses::ev_local_shell_call;
 use core_test_support::responses::ev_response_created;
@@ -16,12 +15,18 @@ use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
 use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::ApplyPatchModelOutput;
+use core_test_support::test_codex::ShellModelOutput;
 use core_test_support::test_codex::test_codex;
 use pretty_assertions::assert_eq;
 use regex_lite::Regex;
 use serde_json::Value;
 use serde_json::json;
 use std::fs;
+use test_case::test_case;
+
+use crate::suite::apply_patch_cli::apply_patch_harness;
+use crate::suite::apply_patch_cli::mount_apply_patch;

 const FIXTURE_JSON: &str = r#"{
    "description": "This is an example JSON file.",
@@ -35,34 +40,88 @@ const FIXTURE_JSON: &str = r#"{
 }
 "#;

+fn shell_responses(
+    call_id: &str,
+    command: Vec<&str>,
+    output_type: ShellModelOutput,
+) -> Result<Vec<String>> {
+    match output_type {
+        ShellModelOutput::ShellCommand => {
+            let command = shlex::try_join(command)?;
+            let parameters = json!({
+                "command": command,
+                "timeout_ms": 2_000,
+            });
+            Ok(vec![
+                sse(vec![
+                    ev_response_created("resp-1"),
+                    ev_function_call(
+                        call_id,
+                        "shell_command",
+                        &serde_json::to_string(&parameters)?,
+                    ),
+                    ev_completed("resp-1"),
+                ]),
+                sse(vec![
+                    ev_assistant_message("msg-1", "done"),
+                    ev_completed("resp-2"),
+                ]),
+            ])
+        }
+        ShellModelOutput::Shell => {
+            let parameters = json!({
+                "command": command,
+                "timeout_ms": 2_000,
+            });
+            Ok(vec![
+                sse(vec![
+                    ev_response_created("resp-1"),
+                    ev_function_call(call_id, "shell", &serde_json::to_string(&parameters)?),
+                    ev_completed("resp-1"),
+                ]),
+                sse(vec![
+                    ev_assistant_message("msg-1", "done"),
+                    ev_completed("resp-2"),
+                ]),
+            ])
+        }
+        ShellModelOutput::LocalShell => Ok(vec![
+            sse(vec![
+                ev_response_created("resp-1"),
+                ev_local_shell_call(call_id, "completed", command),
+                ev_completed("resp-1"),
+            ]),
+            sse(vec![
+                ev_assistant_message("msg-1", "done"),
+                ev_completed("resp-2"),
+            ]),
+        ]),
+    }
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_stays_json_without_freeform_apply_patch(
+    output_type: ShellModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.features.disable(Feature::ApplyPatchFreeform);
        config.model = "gpt-5".to_string();
        config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

    let call_id = "shell-json";
-    let args = json!({
-        "command": ["/bin/echo", "shell json"],
-        "timeout_ms": 1_000,
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -80,7 +139,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {

    let mut parsed: Value = serde_json::from_str(output)?;
    if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
-        // duration_seconds is non-deterministic; remove it for deep equality
        let _ = metadata.remove("duration_seconds");
    }

@@ -102,31 +160,26 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_is_structured_with_freeform_apply_patch(
+    output_type: ShellModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.features.enable(Feature::ApplyPatchFreeform);
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

    let call_id = "shell-structured";
-    let args = json!({
-        "command": ["/bin/echo", "freeform shell"],
-        "timeout_ms": 1_000,
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -159,14 +212,23 @@ freeform shell
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_preserves_fixture_json_without_serialization(
+    output_type: ShellModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.features.disable(Feature::ApplyPatchFreeform);
        config.model = "gpt-5".to_string();
        config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

@@ -175,21 +237,11 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
    let fixture_path_str = fixture_path.to_string_lossy().to_string();

    let call_id = "shell-json-fixture";
-    let args = json!({
-        "command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
-        "timeout_ms": 1_000,
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(
+        call_id,
+        vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
+        output_type,
+    )?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -232,12 +284,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_structures_fixture_with_serialization(
+    output_type: ShellModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.features.enable(Feature::ApplyPatchFreeform);
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

@@ -246,21 +307,11 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
    let fixture_path_str = fixture_path.to_string_lossy().to_string();

    let call_id = "shell-structured-fixture";
-    let args = json!({
-        "command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
-        "timeout_ms": 1_000,
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(
+        call_id,
+        vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
+        output_type,
+    )?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -298,40 +349,26 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_for_freeform_tool_records_duration(
+    output_type: ShellModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.include_apply_patch_tool = true;
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

-    #[cfg(target_os = "linux")]
-    let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
-
-    #[cfg(target_os = "macos")]
-    let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
-
-    #[cfg(windows)]
-    let sleep_cmd = "timeout 1";
-
    let call_id = "shell-structured";
-    let args = json!({
-        "command": sleep_cmd,
-        "timeout_ms": 2_000,
-    });
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -371,33 +408,26 @@ $"#;
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_reserializes_truncated_content() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.model = "gpt-5.1-codex".to_string();
        config.model_family =
-            find_family_for_model("gpt-5.1-codex").expect("gpt-5.1 is a model family");
+            find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
+        let _ = output_type;
    });
    let test = builder.build(&server).await?;

    let call_id = "shell-truncated";
-    let args = json!({
-        "command": ["/bin/sh", "-c", "seq 1 400"],
-        "timeout_ms": 5_000,
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -445,14 +475,16 @@ $"#;
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_custom_tool_output_is_structured(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
-        config.include_apply_patch_tool = true;
-    });
-    let test = builder.build(&server).await?;
+    let harness = apply_patch_harness().await?;

    let call_id = "apply-patch-structured";
    let file_name = "structured.txt";
@@ -463,33 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
 *** End Patch
 "#
    );
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_custom_tool_call(call_id, "apply_patch", &patch),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
+    mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;

-    test.submit_turn_with_policy(
-        "apply the patch via custom tool",
-        SandboxPolicy::DangerFullAccess,
-    )
-    .await?;
+    harness
+        .test()
+        .submit_turn_with_policy(
+            "apply the patch via custom tool",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;

-    let req = mock
-        .last_request()
-        .expect("apply_patch output request recorded");
-    let output_item = req.custom_tool_call_output(call_id);
-    let output = output_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("apply_patch output string");
+    let output = harness.apply_patch_output(call_id, output_type).await;

    let expected_pattern = format!(
        r"(?s)^Exit code: 0
@@ -499,53 +515,39 @@ Success. Updated the following files:
 A {file_name}
 ?$"
    );
-    assert_regex_match(&expected_pattern, output);
+    assert_regex_match(&expected_pattern, output.as_str());

    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_custom_tool_call_creates_file(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
-        config.include_apply_patch_tool = true;
-    });
-    let test = builder.build(&server).await?;
+    let harness = apply_patch_harness().await?;

    let call_id = "apply-patch-add-file";
    let file_name = "custom_tool_apply_patch.txt";
    let patch = format!(
        "*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
    );
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_custom_tool_call(call_id, "apply_patch", &patch),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "apply_patch done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
+    mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;

-    test.submit_turn_with_policy(
-        "apply the patch via custom tool to create a file",
-        SandboxPolicy::DangerFullAccess,
-    )
-    .await?;
+    harness
+        .test()
+        .submit_turn_with_policy(
+            "apply the patch via custom tool to create a file",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;

-    let req = mock
-        .last_request()
-        .expect("apply_patch output request recorded");
-    let output_item = req.custom_tool_call_output(call_id);
-    let output = output_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("apply_patch output string");
+    let output = harness.apply_patch_output(call_id, output_type).await;

    let expected_pattern = format!(
        r"(?s)^Exit code: 0
@@ -555,9 +557,9 @@ Success. Updated the following files:
 A {file_name}
 ?$"
    );
-    assert_regex_match(&expected_pattern, output);
+    assert_regex_match(&expected_pattern, output.as_str());

-    let new_file_path = test.cwd.path().join(file_name);
+    let new_file_path = harness.path(file_name);
    let created_contents = fs::read_to_string(&new_file_path)?;
    assert_eq!(
        created_contents, "custom tool content\n",
@@ -568,49 +570,42 @@ A {file_name}
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_custom_tool_call_updates_existing_file(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
-        config.include_apply_patch_tool = true;
-    });
-    let test = builder.build(&server).await?;
+    let harness = apply_patch_harness().await?;

    let call_id = "apply-patch-update-file";
    let file_name = "custom_tool_apply_patch_existing.txt";
-    let file_path = test.cwd.path().join(file_name);
+    let file_path = harness.path(file_name);
    fs::write(&file_path, "before\n")?;
    let patch = format!(
        "*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
    );
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_custom_tool_call(call_id, "apply_patch", &patch),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "apply_patch update done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
-
-    test.submit_turn_with_policy(
-        "apply the patch via custom tool to update a file",
-        SandboxPolicy::DangerFullAccess,
+    mount_apply_patch(
+        &harness,
+        call_id,
+        &patch,
+        "apply_patch update done",
+        output_type,
    )
-    .await?;
+    .await;

-    let req = mock
-        .last_request()
-        .expect("apply_patch output request recorded");
-    let output_item = req.custom_tool_call_output(call_id);
-    let output = output_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("apply_patch output string");
+    harness
+        .test()
+        .submit_turn_with_policy(
+            "apply the patch via custom tool to update a file",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;
+
+    let output = harness.apply_patch_output(call_id, output_type).await;

    let expected_pattern = format!(
        r"(?s)^Exit code: 0
@@ -620,7 +615,7 @@ Success. Updated the following files:
 M {file_name}
 ?$"
    );
-    assert_regex_match(&expected_pattern, output);
+    assert_regex_match(&expected_pattern, output.as_str());

    let updated_contents = fs::read_to_string(file_path)?;
    assert_eq!(updated_contents, "after\n", "expected updated file content");
@@ -629,99 +624,83 @@ M {file_name}
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_custom_tool_call_reports_failure_output(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
-        config.include_apply_patch_tool = true;
-    });
-    let test = builder.build(&server).await?;
+    let harness = apply_patch_harness().await?;

    let call_id = "apply-patch-failure";
    let missing_file = "missing_custom_tool_apply_patch.txt";
    let patch = format!(
        "*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
    );
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_custom_tool_call(call_id, "apply_patch", &patch),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "apply_patch failure done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
-
-    test.submit_turn_with_policy(
-        "attempt a failing apply_patch via custom tool",
-        SandboxPolicy::DangerFullAccess,
+    mount_apply_patch(
+        &harness,
+        call_id,
+        &patch,
+        "apply_patch failure done",
+        output_type,
    )
-    .await?;
+    .await;

-    let req = mock
-        .last_request()
-        .expect("apply_patch output request recorded");
-    let output_item = req.custom_tool_call_output(call_id);
-    let output = output_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("apply_patch output string");
+    harness
+        .test()
+        .submit_turn_with_policy(
+            "attempt a failing apply_patch via custom tool",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;
+
+    let output = harness.apply_patch_output(call_id, output_type).await;

    let expected_output = format!(
        "apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
-        test.cwd.path().to_string_lossy()
+        harness.cwd().to_string_lossy()
    );
-    assert_eq!(output, expected_output);
+    assert_eq!(output, expected_output.as_str());

    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_function_call_output_is_structured() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_function_call_output_is_structured(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
-        config.include_apply_patch_tool = true;
-    });
-    let test = builder.build(&server).await?;
+    let harness = apply_patch_harness().await?;

    let call_id = "apply-patch-function";
    let file_name = "function_apply_patch.txt";
    let patch =
        format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_apply_patch_function_call(call_id, &patch),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "apply_patch function done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
-
-    test.submit_turn_with_policy(
-        "apply the patch via function-call apply_patch",
-        SandboxPolicy::DangerFullAccess,
+    mount_apply_patch(
+        &harness,
+        call_id,
+        &patch,
+        "apply_patch function done",
+        output_type,
    )
-    .await?;
-
-    let req = mock
-        .last_request()
-        .expect("apply_patch function output request recorded");
-    let output_item = req.function_call_output(call_id);
-    let output = output_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("apply_patch output string");
+    .await;
+    harness
+        .test()
+        .submit_turn_with_policy(
+            "apply the patch via function-call apply_patch",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;

+    let output = harness.apply_patch_output(call_id, output_type).await;
    let expected_pattern = format!(
        r"(?s)^Exit code: 0
 Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -730,40 +709,32 @@ Success. Updated the following files:
 A {file_name}
 ?$"
    );
-    assert_regex_match(&expected_pattern, output);
+    assert_regex_match(&expected_pattern, output.as_str());

    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
+#[test_case(ShellModelOutput::Shell)]
+#[test_case(ShellModelOutput::ShellCommand)]
+#[test_case(ShellModelOutput::LocalShell)]
+async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.model = "gpt-5.1-codex".to_string();
        config.model_family =
            find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
        config.include_apply_patch_tool = true;
+        if matches!(output_type, ShellModelOutput::ShellCommand) {
+            config.features.enable(Feature::ShellCommandTool);
+        }
    });
    let test = builder.build(&server).await?;

    let call_id = "shell-nonzero-exit";
-    let args = json!({
-        "command": ["/bin/sh", "-c", "exit 42"],
-        "timeout_ms": 1_000,
-    });
-    let responses = vec![
-        sse(vec![
-            json!({"type": "response.created", "response": {"id": "resp-1"}}),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "shell failure handled"),
-            ev_completed("resp-2"),
-        ]),
-    ];
+    let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
    let mock = mount_sse_sequence(&server, responses).await;

    test.submit_turn_with_policy(
@@ -793,7 +764,7 @@ async fn shell_command_output_is_structured() -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex().with_config(|config| {
+    let mut builder = test_codex().with_config(move |config| {
        config.features.enable(Feature::ShellCommandTool);
    });
    let test = builder.build(&server).await?;