chore(shell_command) fix freeform timeout output (#7791)

## Summary Adding an additional integration test for timeout_ms ## Testing - [x] these are tests
2026-04-24 14:45:27 +00:00 · 2025-12-15 19:26:39 -08:00
parent c0a12b3952
commit b9d1a087ee
2 changed files with 73 additions and 19 deletions
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -70,9 +70,11 @@ pub fn format_exec_output_for_model_freeform(
    // round to 1 decimal place
    let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;

-    let total_lines = exec_output.aggregated_output.text.lines().count();
+    let content = build_content_with_timeout(exec_output);

-    let formatted_output = truncate_text(&exec_output.aggregated_output.text, truncation_policy);
+    let total_lines = content.lines().count();
+
+    let formatted_output = truncate_text(&content, truncation_policy);

    let mut sections = Vec::new();

@@ -92,21 +94,21 @@ pub fn format_exec_output_str(
    exec_output: &ExecToolCallOutput,
    truncation_policy: TruncationPolicy,
 ) -> String {
-    let ExecToolCallOutput {
-        aggregated_output, ..
-    } = exec_output;
-
-    let content = aggregated_output.text.as_str();
-
-    let body = if exec_output.timed_out {
-        format!(
-            "command timed out after {} milliseconds\n{content}",
-            exec_output.duration.as_millis()
-        )
-    } else {
-        content.to_string()
-    };
+    let content = build_content_with_timeout(exec_output);

    // Truncate for model consumption before serialization.
-    formatted_truncate_text(&body, truncation_policy)
+    formatted_truncate_text(&content, truncation_policy)
+}
+
+/// Extracts exec output content and prepends a timeout message if the command timed out.
+fn build_content_with_timeout(exec_output: &ExecToolCallOutput) -> String {
+    if exec_output.timed_out {
+        format!(
+            "command timed out after {} milliseconds\n{}",
+            exec_output.duration.as_millis(),
+            exec_output.aggregated_output.text
+        )
+    } else {
+        exec_output.aggregated_output.text.clone()
+    }
 }
--- a/codex-rs/core/tests/suite/shell_command.rs
+++ b/codex-rs/core/tests/suite/shell_command.rs
@@ -13,10 +13,15 @@ use core_test_support::test_codex::TestCodexHarness;
 use core_test_support::test_codex::test_codex;
 use serde_json::json;

-fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
+fn shell_responses_with_timeout(
+    call_id: &str,
+    command: &str,
+    login: Option<bool>,
+    timeout_ms: i64,
+) -> Vec<String> {
    let args = json!({
        "command": command,
-        "timeout_ms": 2_000,
+        "timeout_ms": timeout_ms,
        "login": login,
    });

@@ -36,6 +41,10 @@ fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<Str
    ]
 }

+fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
+    shell_responses_with_timeout(call_id, command, login, 2_000)
+}
+
 async fn shell_command_harness_with(
    configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
 ) -> Result<TestCodexHarness> {
@@ -54,6 +63,20 @@ async fn mount_shell_responses(
    mount_sse_sequence(harness.server(), shell_responses(call_id, command, login)).await;
 }

+async fn mount_shell_responses_with_timeout(
+    harness: &TestCodexHarness,
+    call_id: &str,
+    command: &str,
+    login: Option<bool>,
+    timeout_ms: i64,
+) {
+    mount_sse_sequence(
+        harness.server(),
+        shell_responses_with_timeout(call_id, command, login, timeout_ms),
+    )
+    .await;
+}
+
 fn assert_shell_command_output(output: &str, expected: &str) -> Result<()> {
    let normalized_output = output
        .replace("\r\n", "\n")
@@ -172,3 +195,32 @@ async fn pipe_output_without_login() -> anyhow::Result<()> {

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-timeout";
+    let command = if cfg!(windows) {
+        "timeout /t 5"
+    } else {
+        "sleep 5"
+    };
+    mount_shell_responses_with_timeout(&harness, call_id, command, None, 200).await;
+    harness
+        .submit("run a long command with a short timeout")
+        .await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    let normalized_output = output
+        .replace("\r\n", "\n")
+        .replace('\r', "\n")
+        .trim_end_matches('\n')
+        .to_string();
+    let expected_pattern = r"(?s)^Exit code: 124\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\ncommand timed out after [0-9]+ milliseconds\n?$";
+    assert_regex_match(expected_pattern, &normalized_output);
+
+    Ok(())
+}