Avoid double truncation (#6631)

1. Avoid double truncation by giving 10% above the tool default constant 2. Add tests that fails when const = 1
2026-04-28 00:25:56 +00:00 · 2025-11-13 16:59:31 -08:00
parent 7b027e7536
commit 9890ceb939
7 changed files with 264 additions and 35 deletions
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -19,6 +19,7 @@ use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_function_call;
 use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_once;
 use core_test_support::responses::mount_sse_once_match;
 use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
@@ -86,7 +87,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
        serde_json::from_str::<serde_json::Value>(&output).is_err(),
        "expected error output to be plain text",
    );
-    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
+    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#;
    assert_regex_match(truncated_pattern, &output);
    assert!(
        !output.contains("omitted"),
@@ -113,15 +114,25 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
    let fixture = builder.build(&server).await?;

    let call_id = "shell-too-large";
-    let args = serde_json::json!({
-        "command": ["/bin/sh", "-c", "seq 1 400"],
-        "timeout_ms": 5_000,
-    });
+    let args = if cfg!(windows) {
+        serde_json::json!({
+            "command": [
+                "powershell",
+                "-Command",
+                "for ($i=1; $i -le 400; $i++) { Write-Output $i }"
+            ],
+            "timeout_ms": 5_000,
+        })
+    } else {
+        serde_json::json!({
+            "command": ["/bin/sh", "-c", "seq 1 400"],
+            "timeout_ms": 5_000,
+        })
+    };

    // First response: model tells us to run the tool; second: complete the turn.
-    mount_sse_once_match(
+    mount_sse_once(
        &server,
-        any(),
        sse(vec![
            responses::ev_response_created("resp-1"),
            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
@@ -149,6 +160,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
        .single_request()
        .function_call_output_text(call_id)
        .context("function_call_output present for shell call")?;
+    let output = output.replace("\r\n", "\n");

    // Expect plain text (not JSON) with truncation markers and line elision.
    assert!(
@@ -180,6 +192,75 @@ $"#;
    Ok(())
 }

+// Ensures shell tool outputs that exceed the line limit are truncated only once.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_call_output_truncated_only_once() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+    });
+    let fixture = builder.build(&server).await?;
+    let call_id = "shell-single-truncation";
+    let args = if cfg!(windows) {
+        serde_json::json!({
+            "command": [
+                "powershell",
+                "-Command",
+                "for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
+            ],
+            "timeout_ms": 5_000,
+        })
+    } else {
+        serde_json::json!({
+            "command": ["/bin/sh", "-c", "seq 1 2000"],
+            "timeout_ms": 5_000,
+        })
+    };
+
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_assistant_message("msg-1", "done"),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for shell call")?;
+
+    let total_line_headers = output.matches("Total output lines:").count();
+
+    assert_eq!(
+        total_line_headers, 1,
+        "shell output should carry only one truncation header: {output}"
+    );
+
+    Ok(())
+}
+
 // Verifies that an MCP tool call result exceeding the model formatting limits
 // is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
@@ -269,7 +350,8 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
        output.starts_with("Total output lines: 1\n\n{"),
        "expected total line header and JSON head, got: {output}"
    );
-    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
+
+    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]")
        .expect("compile regex");
    assert!(
        byte_marker.is_match(&output),
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@@ -1,18 +1,31 @@
+use anyhow::Context;
 use codex_core::ConversationManager;
 use codex_core::NewConversation;
+use codex_core::model_family::find_family_for_model;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecCommandEndEvent;
 use codex_core::protocol::ExecOutputStream;
 use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::TurnAbortReason;
 use core_test_support::assert_regex_match;
 use core_test_support::load_default_config_for_test;
 use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use core_test_support::wait_for_event_match;
 use regex_lite::escape;
 use std::path::PathBuf;
 use tempfile::TempDir;
+use wiremock::matchers::any;

 #[tokio::test]
 async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
@@ -249,3 +262,72 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "user-shell-double-truncation";
+    let args = if cfg!(windows) {
+        serde_json::json!({
+            "command": [
+                "powershell",
+                "-Command",
+                "for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
+            ],
+            "timeout_ms": 5_000,
+        })
+    } else {
+        serde_json::json!({
+            "command": ["/bin/sh", "-c", "seq 1 2000"],
+            "timeout_ms": 5_000,
+        })
+    };
+
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for shell call")?;
+
+    let truncation_headers = output.matches("Total output lines:").count();
+
+    assert_eq!(
+        truncation_headers, 1,
+        "shell output should carry only one truncation header: {output}"
+    );
+
+    Ok(())
+}