Compare commits

...

1 Commits

Author SHA1 Message Date
pakrym-oai
8d5fde46c7 Remove truncation line-count headers 2026-05-18 16:16:28 -07:00
13 changed files with 272 additions and 98 deletions

View File

@@ -8,6 +8,7 @@ pub(crate) struct UserShellCommand {
pub(crate) exit_code: i32,
pub(crate) duration_seconds: f64,
pub(crate) output: String,
pub(crate) original_token_count: Option<usize>,
}
impl UserShellCommand {
@@ -16,12 +17,14 @@ impl UserShellCommand {
exit_code: i32,
duration: Duration,
output: impl Into<String>,
original_token_count: Option<usize>,
) -> Self {
Self {
command: command.into(),
exit_code,
duration_seconds: duration.as_secs_f64(),
output: output.into(),
original_token_count,
}
}
}
@@ -32,9 +35,14 @@ impl ContextualUserFragment for UserShellCommand {
const END_MARKER: &'static str = "</user_shell_command>";
fn body(&self) -> String {
let truncation_warning = self
.original_token_count
.map(crate::tools::truncation_warning)
.map(|warning| format!("{warning}\n"))
.unwrap_or_default();
format!(
"\n<command>\n{}\n</command>\n<result>\nExit code: {}\nDuration: {:.4} seconds\nOutput:\n{}\n</result>\n",
self.command, self.exit_code, self.duration_seconds, self.output,
"\n<command>\n{}\n</command>\n<result>\nExit code: {}\nDuration: {:.4} seconds\n{}Output:\n{}\n</result>\n",
self.command, self.exit_code, self.duration_seconds, truncation_warning, self.output,
)
}
}

View File

@@ -34,7 +34,7 @@ use codex_features::Feature;
use codex_tools::ToolName;
use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::formatted_truncate_text_content_items_with_policy;
use codex_utils_output_truncation::truncate_function_output_items_with_policy;
use codex_utils_output_truncation::truncate_function_output_items_with_original_token_count;
pub(crate) use execute_handler::CodeModeExecuteHandler;
use response_adapter::into_function_call_output_content_items;
@@ -169,15 +169,27 @@ pub(super) async fn handle_runtime_response(
RuntimeResponse::Yielded { content_items, .. } => {
let mut content_items = into_function_call_output_content_items(content_items);
sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items);
content_items = truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
let (mut content_items, original_token_count) =
truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(
&mut content_items,
&script_status,
started_at.elapsed(),
original_token_count,
);
Ok(FunctionToolOutput::from_content(content_items, Some(true)))
}
RuntimeResponse::Terminated { content_items, .. } => {
let mut content_items = into_function_call_output_content_items(content_items);
sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items);
content_items = truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
let (mut content_items, original_token_count) =
truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(
&mut content_items,
&script_status,
started_at.elapsed(),
original_token_count,
);
Ok(FunctionToolOutput::from_content(content_items, Some(true)))
}
RuntimeResponse::Result {
@@ -199,8 +211,14 @@ pub(super) async fn handle_runtime_response(
text: format!("Script error:\n{error_text}"),
});
}
content_items = truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
let (mut content_items, original_token_count) =
truncate_code_mode_result(content_items, max_output_tokens);
prepend_script_status(
&mut content_items,
&script_status,
started_at.elapsed(),
original_token_count,
);
Ok(FunctionToolOutput::from_content(
content_items,
Some(success),
@@ -233,28 +251,32 @@ fn prepend_script_status(
content_items: &mut Vec<FunctionCallOutputContentItem>,
status: &str,
wall_time: Duration,
original_token_count: Option<usize>,
) {
let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
let header = format!("{status}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n");
let mut header = format!("{status}\nWall time {wall_time_seconds:.1} seconds\n");
if let Some(original_token_count) = original_token_count {
header.push_str(&crate::tools::truncation_warning(original_token_count));
header.push('\n');
}
header.push_str("Output:\n");
content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
}
fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
let max_output_tokens = resolve_max_tokens(max_output_tokens);
let policy = TruncationPolicy::Tokens(max_output_tokens);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
{
let (truncated_items, _) =
formatted_truncate_text_content_items_with_policy(&items, policy);
return truncated_items;
return formatted_truncate_text_content_items_with_policy(&items, policy);
}
truncate_function_output_items_with_policy(&items, policy)
truncate_function_output_items_with_original_token_count(&items, policy)
}
async fn call_nested_tool(

View File

@@ -415,8 +415,20 @@ impl ExecCommandToolOutput {
sections.push(format!("Process running with session ID {process_id}"));
}
if let Some(original_token_count) = self.original_token_count {
sections.push(format!("Original token count: {original_token_count}"));
let computed_original_token_count = self.max_output_tokens.and_then(|max_tokens| {
let text = String::from_utf8_lossy(&self.raw_output);
let (_, original_token_count) =
codex_utils_output_truncation::truncate_text_with_original_token_count(
text.as_ref(),
TruncationPolicy::Tokens(max_tokens),
);
original_token_count
});
if let Some(original_token_count) = computed_original_token_count
.map(|computed| self.original_token_count.unwrap_or(computed))
{
sections.push(crate::tools::truncation_warning(original_token_count));
}
sections.push("Output:".to_string());

View File

@@ -450,7 +450,7 @@ fn exec_command_tool_output_formats_truncated_response() {
^Chunk\ ID:\ abc123
\nWall\ time:\ \d+\.\d{4}\ seconds
\nProcess\ exited\ with\ code\ 0
\nOriginal\ token\ count:\ 10
\nWarning:\ truncated\ output\ \(original\ token\ count:\ 10\)
\nOutput:
\n.*tokens\ truncated.*
$"#,

View File

@@ -21,8 +21,7 @@ use std::borrow::Cow;
use codex_protocol::exec_output::ExecToolCallOutput;
use codex_tools::ToolName;
use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::formatted_truncate_text;
use codex_utils_output_truncation::truncate_text;
use codex_utils_output_truncation::truncate_text_with_original_token_count;
pub use router::ToolRouter;
// Telemetry preview limits: keep log events smaller than model budgets.
@@ -58,6 +57,10 @@ pub(crate) fn tool_user_shell_type(
}
}
pub(crate) fn truncation_warning(original_token_count: usize) -> String {
format!("Warning: truncated output (original token count: {original_token_count})")
}
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(
@@ -66,21 +69,17 @@ pub fn format_exec_output_for_model(
) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
let content = build_content_with_timeout(exec_output);
let total_lines = content.lines().count();
let formatted_output = truncate_text(&content, truncation_policy);
let (formatted_output, original_token_count) =
truncate_text_with_original_token_count(&content, truncation_policy);
let mut sections = Vec::new();
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format!("Wall time: {duration_seconds} seconds"));
if total_lines != formatted_output.lines().count() {
sections.push(format!("Total output lines: {total_lines}"));
if let Some(original_token_count) = original_token_count {
sections.push(truncation_warning(original_token_count));
}
sections.push("Output:".to_string());
sections.push(formatted_output);
@@ -91,10 +90,17 @@ pub fn format_exec_output_str(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
format_exec_output_str_with_original_token_count(exec_output, truncation_policy).0
}
pub fn format_exec_output_str_with_original_token_count(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> (String, Option<usize>) {
let content = build_content_with_timeout(exec_output);
// Truncate for model consumption before serialization.
formatted_truncate_text(&content, truncation_policy)
truncate_text_with_original_token_count(&content, truncation_policy)
}
/// Extracts exec output content and prepends a timeout message if the command timed out.

View File

@@ -4,15 +4,24 @@ use codex_protocol::models::ResponseItem;
use crate::context::ContextualUserFragment;
use crate::context::UserShellCommand;
use crate::session::turn_context::TurnContext;
use crate::tools::format_exec_output_str;
use crate::tools::format_exec_output_str_with_original_token_count;
fn user_shell_command_fragment(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> UserShellCommand {
let output = format_exec_output_str(exec_output, turn_context.truncation_policy);
UserShellCommand::new(command, exec_output.exit_code, exec_output.duration, output)
let (output, original_token_count) = format_exec_output_str_with_original_token_count(
exec_output,
turn_context.truncation_policy,
);
UserShellCommand::new(
command,
exec_output.exit_code,
exec_output.duration,
output,
original_token_count,
)
}
#[cfg(test)]

View File

@@ -647,7 +647,6 @@ text(JSON.stringify(results));
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -655,12 +654,9 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to truncate the final result",
"truncate the final code mode result",
r#"// @exec: {"max_output_tokens": 6}
text(JSON.stringify(await tools.exec_command({
cmd: "printf 'token one token two token three token four token five token six token seven'",
max_output_tokens: 100
})));
text("alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu");
"#,
)
.await?;
@@ -671,18 +667,16 @@ text(JSON.stringify(await tools.exec_command({
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
r"Script completed\nWall time \d+\.\d seconds\n",
r"Warning: truncated output \(original token count: 17\)\n",
r"Output:\n\z",
),
text_item(&items, /*index*/ 0),
);
let expected_pattern = r#"(?sx)
\A
Total\ output\ lines:\ 1\n
\n
.*…\d+\ tokens\ truncated….*
\z
"#;
assert_regex_match(expected_pattern, text_item(&items, /*index*/ 1));
assert_eq!(
text_item(&items, /*index*/ 1),
"alpha beta g…11 tokens truncated…pa lambda mu"
);
Ok(())
}

View File

@@ -446,6 +446,7 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Warning: truncated output \(original token count: 2501\)
Output:
1*…1 chars truncated…1*$";
assert_regex_match(expected_pattern, output);

View File

@@ -179,14 +179,14 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
"expected truncated shell output to be plain text"
);
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 147224\)\nOutput:\n.*?…\d+ chars truncated….*$"#;
assert_regex_match(truncated_pattern, &output);
let len = output.len();
assert!(
(9_900..=10_100).contains(&len),
"expected ~10k chars after truncation, got {len}"
(10_100..=10_200).contains(&len),
"expected ~10k chars plus shell metadata after truncation, got {len}"
);
Ok(())
@@ -256,7 +256,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
);
let truncated_pattern = r#"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Total output lines: 100000
Warning: truncated output \(original token count: 147224\)
Output:
1
2
@@ -324,11 +324,20 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
.context("function_call_output present for shell call")?;
let truncation_markers = output.matches("tokens truncated").count();
let truncation_warnings = output.matches("Warning: truncated output").count();
assert_eq!(
truncation_markers, 1,
"shell output should carry only one truncation marker: {output}"
);
assert_eq!(
truncation_warnings, 1,
"shell output should carry only one truncation warning: {output}"
);
assert!(
!output.contains("Total output lines:"),
"shell output should not carry a line-count header: {output}"
);
Ok(())
}
@@ -601,7 +610,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*tokens truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 123\)\nOutput:\n.*tokens truncated.*$";
assert_regex_match(pattern, &output);
@@ -652,7 +661,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*chars truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 123\)\nOutput:\n.*chars truncated.*$";
assert_regex_match(pattern, &output);

View File

@@ -69,12 +69,11 @@ fn parse_unified_exec_output(raw: &str) -> Result<ParsedUnifiedExecOutput> {
static OUTPUT_REGEX: OnceLock<Regex> = OnceLock::new();
let regex = OUTPUT_REGEX.get_or_init(|| {
Regex::new(concat!(
r#"(?s)^(?:Total output lines: \d+\n\n)?"#,
r#"(?:Chunk ID: (?P<chunk_id>[^\n]+)\n)?"#,
r#"(?s)^(?:Chunk ID: (?P<chunk_id>[^\n]+)\n)?"#,
r#"Wall time: (?P<wall_time>-?\d+(?:\.\d+)?) seconds\n"#,
r#"(?:Process exited with code (?P<exit_code>-?\d+)\n)?"#,
r#"(?:Process running with session ID (?P<process_id>-?\d+)\n)?"#,
r#"(?:Original token count: (?P<original_token_count>\d+)\n)?"#,
r#"(?:Warning: truncated output \(original token count: (?P<original_token_count>\d+)\)\n)?"#,
r#"Output:\n?(?P<output>.*)$"#,
))
.expect("valid unified exec output regex")
@@ -166,6 +165,13 @@ async fn wait_for_raw_unified_exec_output(
test: &TestCodex,
call_id: &str,
) -> Result<ParsedUnifiedExecOutput> {
let content = wait_for_raw_unified_exec_output_text(test, call_id).await?;
parse_unified_exec_output(&content)
.with_context(|| format!("failed to parse raw unified exec output for {call_id}"))
}
async fn wait_for_raw_unified_exec_output_text(test: &TestCodex, call_id: &str) -> Result<String> {
let content = wait_for_event_match(&test.codex, |event| match event {
EventMsg::RawResponseItem(raw) => match &raw.item {
ResponseItem::FunctionCallOutput {
@@ -178,8 +184,7 @@ async fn wait_for_raw_unified_exec_output(
})
.await;
parse_unified_exec_output(&content)
.with_context(|| format!("failed to parse raw unified exec output for {call_id}"))
Ok(content)
}
async fn submit_unified_exec_turn(
@@ -1453,6 +1458,67 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_command_omits_truncation_warning_for_untruncated_output() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config
.features
.enable(Feature::UnifiedExec)
.expect("test config should allow feature update");
});
let test = builder.build_with_remote_env(&server).await?;
let call_id = "uexec-untruncated-output";
let args = serde_json::json!({
"cmd": "printf 'short output'",
"yield_time_ms": 500,
"max_output_tokens": 100,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let request_log = mount_sse_sequence(&server, responses).await;
submit_unified_exec_turn(&test, "run short output", PermissionProfile::Disabled).await?;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
let raw_output = request_log
.function_call_output_text(call_id)
.context("function_call_output present for exec_command call")?;
let raw_output = raw_output.replace("\r\n", "\n");
assert_regex_match(
concat!(
r"\AChunk ID: [0-9a-f]{6}\n",
r"Wall time: \d+\.\d{4} seconds\n",
r"Process exited with code 0\n",
r"Output:\n",
r"short output\z",
),
&raw_output,
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_command_clamps_model_requested_max_output_tokens_to_policy() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -1499,12 +1565,26 @@ async fn exec_command_clamps_model_requested_max_output_tokens_to_policy() -> Re
)
.await?;
let output = wait_for_raw_unified_exec_output(&test, call_id).await?;
assert_eq!(output.original_token_count, Some(8_991));
let output_text = output.output.replace("\r\n", "\n");
let raw_output = wait_for_raw_unified_exec_output_text(&test, call_id)
.await?
.replace("\r\n", "\n");
assert_regex_match(
r"^Total output lines: 999\n\nEXEC-LINE-0001 x{20}\nEXEC-LINE-0002 x{20}\nEXEC-LINE-0003 x{13}…8941 tokens truncated…E-0997 x{20}\nEXEC-LINE-0998 x{20}\nEXEC-LINE-0999 x{20}\n$",
&output_text,
concat!(
r"\AChunk ID: [0-9a-f]{6}\n",
r"Wall time: \d+\.\d{4} seconds\n",
r"Process exited with code 0\n",
r"Warning: truncated output \(original token count: 8991\)\n",
r"Output:\n",
r"EXEC-LINE-0001 x{20}\n",
r"EXEC-LINE-0002 x{20}\n",
r"EXEC-LINE-0003 x{13}",
r"…8941 tokens truncated…",
r"E-0997 x{20}\n",
r"EXEC-LINE-0998 x{20}\n",
r"EXEC-LINE-0999 x{20}\n",
r"\z",
),
&raw_output,
);
wait_for_event(&test.codex, |event| {
@@ -1592,7 +1672,7 @@ async fn write_stdin_clamps_model_requested_max_output_tokens_to_policy() -> Res
assert_eq!(stdin_output.original_token_count, Some(9_492));
let stdin_output_text = stdin_output.output.replace("\r\n", "\n");
assert_regex_match(
r"^Total output lines: 1000\n\ngo\nSTDIN-LINE-0001 y{20}\nSTDIN-LINE-0002 y{20}\nSTDIN-LINE-0003 yyyy…9442 tokens truncated…7 y{20}\nSTDIN-LINE-0998 y{20}\nSTDIN-LINE-0999 y{20}\n$",
r"^go\nSTDIN-LINE-0001 y{20}\nSTDIN-LINE-0002 y{20}\nSTDIN-LINE-0003 yyyy…9442 tokens truncated…7 y{20}\nSTDIN-LINE-0998 y{20}\nSTDIN-LINE-0999 y{20}\n$",
&stdin_output_text,
);
@@ -2628,7 +2708,8 @@ PY
let large_output = outputs.get(call_id).expect("missing large output summary");
let output_text = large_output.output.replace("\r\n", "\n");
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
let truncated_pattern =
r"(?s)^(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
assert_regex_match(truncated_pattern, &output_text);
let original_tokens = large_output

View File

@@ -435,12 +435,11 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
let truncated_body =
format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
let truncated_body = format!("{head}70…273 tokens truncated…351\n{tail}");
let escaped_command = escape(&command);
let escaped_truncated_body = escape(&truncated_body);
let expected_pattern = format!(
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{escaped_truncated_body}\n</result>\n</user_shell_command>\z"
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+\.[0-9]{{4}} seconds\nWarning: truncated output \(original token count: 373\)\nOutput:\n{escaped_truncated_body}\n</result>\n</user_shell_command>\z"
);
assert_regex_match(&expected_pattern, &command_message);
@@ -454,19 +453,19 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5.4").with_config(|config| {
config.tool_output_token_limit = Some(100);
config.tool_output_token_limit = Some(250);
});
let fixture = builder.build(&server).await?;
let call_id = "user-shell-double-truncation";
let args = if cfg!(windows) {
serde_json::json!({
"command": "for ($i=1; $i -le 2000; $i++) { Write-Output $i }",
"command": "Start-Sleep -Milliseconds 200; for ($i=1; $i -le 2000; $i++) { Write-Output $i }",
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": "seq 1 2000",
"command": "sh -c 'sleep 0.2; seq 1 2000'",
"timeout_ms": 5_000,
})
};
@@ -500,13 +499,16 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell_command call")?;
let output = output.replace("\r\n", "\n");
let truncation_headers = output.matches("Total output lines:").count();
assert_eq!(
truncation_headers, 1,
"shell_command output should carry only one truncation header: {output}"
let head = (1..=152).map(|i| format!("{i}\n")).collect::<String>();
let tail = (1901..=2000).map(|i| format!("{i}\n")).collect::<String>();
let truncated_body = format!("{head}…1974 tokens truncated…{tail}");
let escaped_truncated_body = escape(&truncated_body);
let expected_pattern = format!(
r"\AExit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 2224\)\nOutput:\n{escaped_truncated_body}\z"
);
assert_regex_match(&expected_pattern, &output);
Ok(())
}

View File

@@ -10,13 +10,20 @@ use codex_utils_string::truncate_middle_with_token_budget;
pub use codex_protocol::protocol::TruncationPolicy;
pub fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
truncate_text_with_original_token_count(content, policy).0
}
pub fn truncate_text_with_original_token_count(
content: &str,
policy: TruncationPolicy,
) -> (String, Option<usize>) {
if content.len() <= policy.byte_budget() {
return content.to_string();
return (content.to_string(), None);
}
let total_lines = content.lines().count();
let result = truncate_text(content, policy);
format!("Total output lines: {total_lines}\n\n{result}")
let original_token_count = (result != content).then(|| approx_token_count(content));
(result, original_token_count)
}
pub fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
@@ -54,9 +61,8 @@ pub fn formatted_truncate_text_content_items_with_policy(
return (items.to_vec(), None);
}
let mut out = vec![FunctionCallOutputContentItem::InputText {
text: formatted_truncate_text(&combined, policy),
}];
let (text, original_token_count) = truncate_text_with_original_token_count(&combined, policy);
let mut out = vec![FunctionCallOutputContentItem::InputText { text }];
out.extend(items.iter().filter_map(|item| match item {
FunctionCallOutputContentItem::InputImage { image_url, detail } => {
Some(FunctionCallOutputContentItem::InputImage {
@@ -67,25 +73,41 @@ pub fn formatted_truncate_text_content_items_with_policy(
FunctionCallOutputContentItem::InputText { .. } => None,
}));
(out, Some(approx_token_count(&combined)))
(out, original_token_count)
}
pub fn truncate_function_output_items_with_policy(
items: &[FunctionCallOutputContentItem],
policy: TruncationPolicy,
) -> Vec<FunctionCallOutputContentItem> {
truncate_function_output_items_with_original_token_count(items, policy).0
}
pub fn truncate_function_output_items_with_original_token_count(
items: &[FunctionCallOutputContentItem],
policy: TruncationPolicy,
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
let mut remaining_budget = match policy {
TruncationPolicy::Bytes(_) => policy.byte_budget(),
TruncationPolicy::Tokens(_) => policy.token_budget(),
};
let mut omitted_text_items = 0usize;
let mut truncated = false;
let original_token_count = items
.iter()
.filter_map(|item| match item {
FunctionCallOutputContentItem::InputText { text } => Some(approx_token_count(text)),
FunctionCallOutputContentItem::InputImage { .. } => None,
})
.sum::<usize>();
for item in items {
match item {
FunctionCallOutputContentItem::InputText { text } => {
if remaining_budget == 0 {
omitted_text_items += 1;
truncated = true;
continue;
}
@@ -108,6 +130,7 @@ pub fn truncate_function_output_items_with_policy(
} else {
out.push(FunctionCallOutputContentItem::InputText { text: snippet });
}
truncated = true;
remaining_budget = 0;
}
}
@@ -126,7 +149,7 @@ pub fn truncate_function_output_items_with_policy(
});
}
out
(out, truncated.then_some(original_token_count))
}
pub fn approx_tokens_from_byte_count_i64(bytes: i64) -> i64 {

View File

@@ -3,7 +3,7 @@ use crate::approx_token_count;
use crate::approx_tokens_from_byte_count_i64;
use crate::formatted_truncate_text;
use crate::formatted_truncate_text_content_items_with_policy;
use crate::truncate_function_output_items_with_policy;
use crate::truncate_function_output_items_with_original_token_count;
use crate::truncate_text;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -14,7 +14,7 @@ fn truncate_bytes_less_than_placeholder_returns_placeholder() {
let content = "example output";
assert_eq!(
"Total output lines: 1\n\n…13 chars truncated…t",
"…13 chars truncated…t",
formatted_truncate_text(content, TruncationPolicy::Bytes(1)),
);
}
@@ -24,7 +24,7 @@ fn truncate_tokens_less_than_placeholder_returns_placeholder() {
let content = "example output";
assert_eq!(
"Total output lines: 1\n\nex…3 tokens truncated…ut",
"ex…3 tokens truncated…ut",
formatted_truncate_text(content, TruncationPolicy::Tokens(1)),
);
}
@@ -54,7 +54,7 @@ fn truncate_tokens_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an…10 tokens truncated… truncated",
"this is an…10 tokens truncated… truncated",
formatted_truncate_text(content, TruncationPolicy::Tokens(5)),
);
}
@@ -64,29 +64,29 @@ fn truncate_bytes_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated",
"this is an exam…30 chars truncated…ld be truncated",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_bytes_reports_original_line_count_when_truncated() {
fn truncate_bytes_over_multiple_lines_returns_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line",
"this is an exam…51 chars truncated…some other line",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_tokens_reports_original_line_count_when_truncated() {
fn truncate_tokens_over_multiple_lines_returns_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line",
"this is an example o…11 tokens truncated…also some other line",
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
);
}
@@ -109,6 +109,10 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
let t3 = chunk.repeat(10);
let t4 = chunk.to_string();
let t5 = chunk.to_string();
let total_original_token_count = [&t1, &t2, &t3, &t4, &t5]
.iter()
.map(|text| approx_token_count(text))
.sum::<usize>();
let items = vec![
FunctionCallOutputContentItem::InputText { text: t1.clone() },
@@ -122,10 +126,13 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
FunctionCallOutputContentItem::InputText { text: t5 },
];
let output =
truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit));
let (output, original_token_count) = truncate_function_output_items_with_original_token_count(
&items,
TruncationPolicy::Tokens(limit),
);
assert_eq!(output.len(), 5);
assert_eq!(original_token_count, Some(total_original_token_count));
let first_text = match &output[0] {
FunctionCallOutputContentItem::InputText { text } => text,
@@ -201,7 +208,7 @@ fn formatted_truncate_text_content_items_with_policy_preserves_empty_leading_tex
assert_eq!(
output,
vec![FunctionCallOutputContentItem::InputText {
text: "Total output lines: 1\n\n…3 chars truncated…".to_string(),
text: "…3 chars truncated…".to_string(),
}]
);
assert_eq!(original_token_count, Some(1));
@@ -236,7 +243,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
output,
vec![
FunctionCallOutputContentItem::InputText {
text: "Total output lines: 3\n\nabcd…6 chars truncated…ijkl".to_string(),
text: "abcd…6 chars truncated…ijkl".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:one".to_string(),
@@ -268,7 +275,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_all_text_for_token_b
assert_eq!(
output,
vec![FunctionCallOutputContentItem::InputText {
text: "Total output lines: 2\n\nabcd…3 tokens truncated…mnop".to_string(),
text: "abcd…3 tokens truncated…mnop".to_string(),
}]
);
assert_eq!(original_token_count, Some(5));