mirror of
https://github.com/openai/codex.git
synced 2026-05-21 19:45:26 +00:00
Compare commits
1 Commits
rreichel3/
...
pakrym/rem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8d5fde46c7 |
@@ -8,6 +8,7 @@ pub(crate) struct UserShellCommand {
|
||||
pub(crate) exit_code: i32,
|
||||
pub(crate) duration_seconds: f64,
|
||||
pub(crate) output: String,
|
||||
pub(crate) original_token_count: Option<usize>,
|
||||
}
|
||||
|
||||
impl UserShellCommand {
|
||||
@@ -16,12 +17,14 @@ impl UserShellCommand {
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
output: impl Into<String>,
|
||||
original_token_count: Option<usize>,
|
||||
) -> Self {
|
||||
Self {
|
||||
command: command.into(),
|
||||
exit_code,
|
||||
duration_seconds: duration.as_secs_f64(),
|
||||
output: output.into(),
|
||||
original_token_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -32,9 +35,14 @@ impl ContextualUserFragment for UserShellCommand {
|
||||
const END_MARKER: &'static str = "</user_shell_command>";
|
||||
|
||||
fn body(&self) -> String {
|
||||
let truncation_warning = self
|
||||
.original_token_count
|
||||
.map(crate::tools::truncation_warning)
|
||||
.map(|warning| format!("{warning}\n"))
|
||||
.unwrap_or_default();
|
||||
format!(
|
||||
"\n<command>\n{}\n</command>\n<result>\nExit code: {}\nDuration: {:.4} seconds\nOutput:\n{}\n</result>\n",
|
||||
self.command, self.exit_code, self.duration_seconds, self.output,
|
||||
"\n<command>\n{}\n</command>\n<result>\nExit code: {}\nDuration: {:.4} seconds\n{}Output:\n{}\n</result>\n",
|
||||
self.command, self.exit_code, self.duration_seconds, truncation_warning, self.output,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ use codex_features::Feature;
|
||||
use codex_tools::ToolName;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use codex_utils_output_truncation::formatted_truncate_text_content_items_with_policy;
|
||||
use codex_utils_output_truncation::truncate_function_output_items_with_policy;
|
||||
use codex_utils_output_truncation::truncate_function_output_items_with_original_token_count;
|
||||
|
||||
pub(crate) use execute_handler::CodeModeExecuteHandler;
|
||||
use response_adapter::into_function_call_output_content_items;
|
||||
@@ -169,15 +169,27 @@ pub(super) async fn handle_runtime_response(
|
||||
RuntimeResponse::Yielded { content_items, .. } => {
|
||||
let mut content_items = into_function_call_output_content_items(content_items);
|
||||
sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items);
|
||||
content_items = truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
|
||||
let (mut content_items, original_token_count) =
|
||||
truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(
|
||||
&mut content_items,
|
||||
&script_status,
|
||||
started_at.elapsed(),
|
||||
original_token_count,
|
||||
);
|
||||
Ok(FunctionToolOutput::from_content(content_items, Some(true)))
|
||||
}
|
||||
RuntimeResponse::Terminated { content_items, .. } => {
|
||||
let mut content_items = into_function_call_output_content_items(content_items);
|
||||
sanitize_runtime_image_detail(exec.turn.as_ref(), &mut content_items);
|
||||
content_items = truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
|
||||
let (mut content_items, original_token_count) =
|
||||
truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(
|
||||
&mut content_items,
|
||||
&script_status,
|
||||
started_at.elapsed(),
|
||||
original_token_count,
|
||||
);
|
||||
Ok(FunctionToolOutput::from_content(content_items, Some(true)))
|
||||
}
|
||||
RuntimeResponse::Result {
|
||||
@@ -199,8 +211,14 @@ pub(super) async fn handle_runtime_response(
|
||||
text: format!("Script error:\n{error_text}"),
|
||||
});
|
||||
}
|
||||
content_items = truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(&mut content_items, &script_status, started_at.elapsed());
|
||||
let (mut content_items, original_token_count) =
|
||||
truncate_code_mode_result(content_items, max_output_tokens);
|
||||
prepend_script_status(
|
||||
&mut content_items,
|
||||
&script_status,
|
||||
started_at.elapsed(),
|
||||
original_token_count,
|
||||
);
|
||||
Ok(FunctionToolOutput::from_content(
|
||||
content_items,
|
||||
Some(success),
|
||||
@@ -233,28 +251,32 @@ fn prepend_script_status(
|
||||
content_items: &mut Vec<FunctionCallOutputContentItem>,
|
||||
status: &str,
|
||||
wall_time: Duration,
|
||||
original_token_count: Option<usize>,
|
||||
) {
|
||||
let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
let header = format!("{status}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n");
|
||||
let mut header = format!("{status}\nWall time {wall_time_seconds:.1} seconds\n");
|
||||
if let Some(original_token_count) = original_token_count {
|
||||
header.push_str(&crate::tools::truncation_warning(original_token_count));
|
||||
header.push('\n');
|
||||
}
|
||||
header.push_str("Output:\n");
|
||||
content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
|
||||
}
|
||||
|
||||
fn truncate_code_mode_result(
|
||||
items: Vec<FunctionCallOutputContentItem>,
|
||||
max_output_tokens: Option<usize>,
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
|
||||
let max_output_tokens = resolve_max_tokens(max_output_tokens);
|
||||
let policy = TruncationPolicy::Tokens(max_output_tokens);
|
||||
if items
|
||||
.iter()
|
||||
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
|
||||
{
|
||||
let (truncated_items, _) =
|
||||
formatted_truncate_text_content_items_with_policy(&items, policy);
|
||||
return truncated_items;
|
||||
return formatted_truncate_text_content_items_with_policy(&items, policy);
|
||||
}
|
||||
|
||||
truncate_function_output_items_with_policy(&items, policy)
|
||||
truncate_function_output_items_with_original_token_count(&items, policy)
|
||||
}
|
||||
|
||||
async fn call_nested_tool(
|
||||
|
||||
@@ -415,8 +415,20 @@ impl ExecCommandToolOutput {
|
||||
sections.push(format!("Process running with session ID {process_id}"));
|
||||
}
|
||||
|
||||
if let Some(original_token_count) = self.original_token_count {
|
||||
sections.push(format!("Original token count: {original_token_count}"));
|
||||
let computed_original_token_count = self.max_output_tokens.and_then(|max_tokens| {
|
||||
let text = String::from_utf8_lossy(&self.raw_output);
|
||||
let (_, original_token_count) =
|
||||
codex_utils_output_truncation::truncate_text_with_original_token_count(
|
||||
text.as_ref(),
|
||||
TruncationPolicy::Tokens(max_tokens),
|
||||
);
|
||||
original_token_count
|
||||
});
|
||||
|
||||
if let Some(original_token_count) = computed_original_token_count
|
||||
.map(|computed| self.original_token_count.unwrap_or(computed))
|
||||
{
|
||||
sections.push(crate::tools::truncation_warning(original_token_count));
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
|
||||
@@ -450,7 +450,7 @@ fn exec_command_tool_output_formats_truncated_response() {
|
||||
^Chunk\ ID:\ abc123
|
||||
\nWall\ time:\ \d+\.\d{4}\ seconds
|
||||
\nProcess\ exited\ with\ code\ 0
|
||||
\nOriginal\ token\ count:\ 10
|
||||
\nWarning:\ truncated\ output\ \(original\ token\ count:\ 10\)
|
||||
\nOutput:
|
||||
\n.*tokens\ truncated.*
|
||||
$"#,
|
||||
|
||||
@@ -21,8 +21,7 @@ use std::borrow::Cow;
|
||||
use codex_protocol::exec_output::ExecToolCallOutput;
|
||||
use codex_tools::ToolName;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use codex_utils_output_truncation::formatted_truncate_text;
|
||||
use codex_utils_output_truncation::truncate_text;
|
||||
use codex_utils_output_truncation::truncate_text_with_original_token_count;
|
||||
pub use router::ToolRouter;
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
@@ -58,6 +57,10 @@ pub(crate) fn tool_user_shell_type(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn truncation_warning(original_token_count: usize) -> String {
|
||||
format!("Warning: truncated output (original token count: {original_token_count})")
|
||||
}
|
||||
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(
|
||||
@@ -66,21 +69,17 @@ pub fn format_exec_output_for_model(
|
||||
) -> String {
|
||||
// round to 1 decimal place
|
||||
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
|
||||
let content = build_content_with_timeout(exec_output);
|
||||
|
||||
let total_lines = content.lines().count();
|
||||
|
||||
let formatted_output = truncate_text(&content, truncation_policy);
|
||||
let (formatted_output, original_token_count) =
|
||||
truncate_text_with_original_token_count(&content, truncation_policy);
|
||||
|
||||
let mut sections = Vec::new();
|
||||
|
||||
sections.push(format!("Exit code: {}", exec_output.exit_code));
|
||||
sections.push(format!("Wall time: {duration_seconds} seconds"));
|
||||
if total_lines != formatted_output.lines().count() {
|
||||
sections.push(format!("Total output lines: {total_lines}"));
|
||||
if let Some(original_token_count) = original_token_count {
|
||||
sections.push(truncation_warning(original_token_count));
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
sections.push(formatted_output);
|
||||
|
||||
@@ -91,10 +90,17 @@ pub fn format_exec_output_str(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> String {
|
||||
format_exec_output_str_with_original_token_count(exec_output, truncation_policy).0
|
||||
}
|
||||
|
||||
pub fn format_exec_output_str_with_original_token_count(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> (String, Option<usize>) {
|
||||
let content = build_content_with_timeout(exec_output);
|
||||
|
||||
// Truncate for model consumption before serialization.
|
||||
formatted_truncate_text(&content, truncation_policy)
|
||||
truncate_text_with_original_token_count(&content, truncation_policy)
|
||||
}
|
||||
|
||||
/// Extracts exec output content and prepends a timeout message if the command timed out.
|
||||
|
||||
@@ -4,15 +4,24 @@ use codex_protocol::models::ResponseItem;
|
||||
use crate::context::ContextualUserFragment;
|
||||
use crate::context::UserShellCommand;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use crate::tools::format_exec_output_str;
|
||||
use crate::tools::format_exec_output_str_with_original_token_count;
|
||||
|
||||
fn user_shell_command_fragment(
|
||||
command: &str,
|
||||
exec_output: &ExecToolCallOutput,
|
||||
turn_context: &TurnContext,
|
||||
) -> UserShellCommand {
|
||||
let output = format_exec_output_str(exec_output, turn_context.truncation_policy);
|
||||
UserShellCommand::new(command, exec_output.exit_code, exec_output.duration, output)
|
||||
let (output, original_token_count) = format_exec_output_str_with_original_token_count(
|
||||
exec_output,
|
||||
turn_context.truncation_policy,
|
||||
);
|
||||
UserShellCommand::new(
|
||||
command,
|
||||
exec_output.exit_code,
|
||||
exec_output.duration,
|
||||
output,
|
||||
original_token_count,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -647,7 +647,6 @@ text(JSON.stringify(results));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -655,12 +654,9 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec to truncate the final result",
|
||||
"truncate the final code mode result",
|
||||
r#"// @exec: {"max_output_tokens": 6}
|
||||
text(JSON.stringify(await tools.exec_command({
|
||||
cmd: "printf 'token one token two token three token four token five token six token seven'",
|
||||
max_output_tokens: 100
|
||||
})));
|
||||
text("alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu");
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
@@ -671,18 +667,16 @@ text(JSON.stringify(await tools.exec_command({
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
r"Script completed\nWall time \d+\.\d seconds\n",
|
||||
r"Warning: truncated output \(original token count: 17\)\n",
|
||||
r"Output:\n\z",
|
||||
),
|
||||
text_item(&items, /*index*/ 0),
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
Total\ output\ lines:\ 1\n
|
||||
\n
|
||||
.*…\d+\ tokens\ truncated….*
|
||||
\z
|
||||
"#;
|
||||
assert_regex_match(expected_pattern, text_item(&items, /*index*/ 1));
|
||||
assert_eq!(
|
||||
text_item(&items, /*index*/ 1),
|
||||
"alpha beta g…11 tokens truncated…pa lambda mu"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -446,6 +446,7 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Warning: truncated output \(original token count: 2501\)
|
||||
Output:
|
||||
1*…1 chars truncated…1*$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
@@ -179,14 +179,14 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 147224\)\nOutput:\n.*?…\d+ chars truncated….*$"#;
|
||||
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
let len = output.len();
|
||||
assert!(
|
||||
(9_900..=10_100).contains(&len),
|
||||
"expected ~10k chars after truncation, got {len}"
|
||||
(10_100..=10_200).contains(&len),
|
||||
"expected ~10k chars plus shell metadata after truncation, got {len}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -256,7 +256,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: 100000
|
||||
Warning: truncated output \(original token count: 147224\)
|
||||
Output:
|
||||
1
|
||||
2
|
||||
@@ -324,11 +324,20 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
.context("function_call_output present for shell call")?;
|
||||
|
||||
let truncation_markers = output.matches("tokens truncated").count();
|
||||
let truncation_warnings = output.matches("Warning: truncated output").count();
|
||||
|
||||
assert_eq!(
|
||||
truncation_markers, 1,
|
||||
"shell output should carry only one truncation marker: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
truncation_warnings, 1,
|
||||
"shell output should carry only one truncation warning: {output}"
|
||||
);
|
||||
assert!(
|
||||
!output.contains("Total output lines:"),
|
||||
"shell output should not carry a line-count header: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -601,7 +610,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*tokens truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 123\)\nOutput:\n.*tokens truncated.*$";
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
@@ -652,7 +661,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*chars truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 123\)\nOutput:\n.*chars truncated.*$";
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
|
||||
@@ -69,12 +69,11 @@ fn parse_unified_exec_output(raw: &str) -> Result<ParsedUnifiedExecOutput> {
|
||||
static OUTPUT_REGEX: OnceLock<Regex> = OnceLock::new();
|
||||
let regex = OUTPUT_REGEX.get_or_init(|| {
|
||||
Regex::new(concat!(
|
||||
r#"(?s)^(?:Total output lines: \d+\n\n)?"#,
|
||||
r#"(?:Chunk ID: (?P<chunk_id>[^\n]+)\n)?"#,
|
||||
r#"(?s)^(?:Chunk ID: (?P<chunk_id>[^\n]+)\n)?"#,
|
||||
r#"Wall time: (?P<wall_time>-?\d+(?:\.\d+)?) seconds\n"#,
|
||||
r#"(?:Process exited with code (?P<exit_code>-?\d+)\n)?"#,
|
||||
r#"(?:Process running with session ID (?P<process_id>-?\d+)\n)?"#,
|
||||
r#"(?:Original token count: (?P<original_token_count>\d+)\n)?"#,
|
||||
r#"(?:Warning: truncated output \(original token count: (?P<original_token_count>\d+)\)\n)?"#,
|
||||
r#"Output:\n?(?P<output>.*)$"#,
|
||||
))
|
||||
.expect("valid unified exec output regex")
|
||||
@@ -166,6 +165,13 @@ async fn wait_for_raw_unified_exec_output(
|
||||
test: &TestCodex,
|
||||
call_id: &str,
|
||||
) -> Result<ParsedUnifiedExecOutput> {
|
||||
let content = wait_for_raw_unified_exec_output_text(test, call_id).await?;
|
||||
|
||||
parse_unified_exec_output(&content)
|
||||
.with_context(|| format!("failed to parse raw unified exec output for {call_id}"))
|
||||
}
|
||||
|
||||
async fn wait_for_raw_unified_exec_output_text(test: &TestCodex, call_id: &str) -> Result<String> {
|
||||
let content = wait_for_event_match(&test.codex, |event| match event {
|
||||
EventMsg::RawResponseItem(raw) => match &raw.item {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
@@ -178,8 +184,7 @@ async fn wait_for_raw_unified_exec_output(
|
||||
})
|
||||
.await;
|
||||
|
||||
parse_unified_exec_output(&content)
|
||||
.with_context(|| format!("failed to parse raw unified exec output for {call_id}"))
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
async fn submit_unified_exec_turn(
|
||||
@@ -1453,6 +1458,67 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_command_omits_truncation_warning_for_untruncated_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
skip_if_windows!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::UnifiedExec)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let test = builder.build_with_remote_env(&server).await?;
|
||||
|
||||
let call_id = "uexec-untruncated-output";
|
||||
let args = serde_json::json!({
|
||||
"cmd": "printf 'short output'",
|
||||
"yield_time_ms": 500,
|
||||
"max_output_tokens": 100,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let request_log = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_unified_exec_turn(&test, "run short output", PermissionProfile::Disabled).await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let raw_output = request_log
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for exec_command call")?;
|
||||
let raw_output = raw_output.replace("\r\n", "\n");
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"\AChunk ID: [0-9a-f]{6}\n",
|
||||
r"Wall time: \d+\.\d{4} seconds\n",
|
||||
r"Process exited with code 0\n",
|
||||
r"Output:\n",
|
||||
r"short output\z",
|
||||
),
|
||||
&raw_output,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_command_clamps_model_requested_max_output_tokens_to_policy() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -1499,12 +1565,26 @@ async fn exec_command_clamps_model_requested_max_output_tokens_to_policy() -> Re
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = wait_for_raw_unified_exec_output(&test, call_id).await?;
|
||||
assert_eq!(output.original_token_count, Some(8_991));
|
||||
let output_text = output.output.replace("\r\n", "\n");
|
||||
let raw_output = wait_for_raw_unified_exec_output_text(&test, call_id)
|
||||
.await?
|
||||
.replace("\r\n", "\n");
|
||||
assert_regex_match(
|
||||
r"^Total output lines: 999\n\nEXEC-LINE-0001 x{20}\nEXEC-LINE-0002 x{20}\nEXEC-LINE-0003 x{13}…8941 tokens truncated…E-0997 x{20}\nEXEC-LINE-0998 x{20}\nEXEC-LINE-0999 x{20}\n$",
|
||||
&output_text,
|
||||
concat!(
|
||||
r"\AChunk ID: [0-9a-f]{6}\n",
|
||||
r"Wall time: \d+\.\d{4} seconds\n",
|
||||
r"Process exited with code 0\n",
|
||||
r"Warning: truncated output \(original token count: 8991\)\n",
|
||||
r"Output:\n",
|
||||
r"EXEC-LINE-0001 x{20}\n",
|
||||
r"EXEC-LINE-0002 x{20}\n",
|
||||
r"EXEC-LINE-0003 x{13}",
|
||||
r"…8941 tokens truncated…",
|
||||
r"E-0997 x{20}\n",
|
||||
r"EXEC-LINE-0998 x{20}\n",
|
||||
r"EXEC-LINE-0999 x{20}\n",
|
||||
r"\z",
|
||||
),
|
||||
&raw_output,
|
||||
);
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
@@ -1592,7 +1672,7 @@ async fn write_stdin_clamps_model_requested_max_output_tokens_to_policy() -> Res
|
||||
assert_eq!(stdin_output.original_token_count, Some(9_492));
|
||||
let stdin_output_text = stdin_output.output.replace("\r\n", "\n");
|
||||
assert_regex_match(
|
||||
r"^Total output lines: 1000\n\ngo\nSTDIN-LINE-0001 y{20}\nSTDIN-LINE-0002 y{20}\nSTDIN-LINE-0003 yyyy…9442 tokens truncated…7 y{20}\nSTDIN-LINE-0998 y{20}\nSTDIN-LINE-0999 y{20}\n$",
|
||||
r"^go\nSTDIN-LINE-0001 y{20}\nSTDIN-LINE-0002 y{20}\nSTDIN-LINE-0003 yyyy…9442 tokens truncated…7 y{20}\nSTDIN-LINE-0998 y{20}\nSTDIN-LINE-0999 y{20}\n$",
|
||||
&stdin_output_text,
|
||||
);
|
||||
|
||||
@@ -2628,7 +2708,8 @@ PY
|
||||
let large_output = outputs.get(call_id).expect("missing large output summary");
|
||||
|
||||
let output_text = large_output.output.replace("\r\n", "\n");
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
|
||||
let truncated_pattern =
|
||||
r"(?s)^(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
|
||||
assert_regex_match(truncated_pattern, &output_text);
|
||||
|
||||
let original_tokens = large_output
|
||||
|
||||
@@ -435,12 +435,11 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
|
||||
|
||||
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body =
|
||||
format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
|
||||
let truncated_body = format!("{head}70…273 tokens truncated…351\n{tail}");
|
||||
let escaped_command = escape(&command);
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{escaped_truncated_body}\n</result>\n</user_shell_command>\z"
|
||||
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+\.[0-9]{{4}} seconds\nWarning: truncated output \(original token count: 373\)\nOutput:\n{escaped_truncated_body}\n</result>\n</user_shell_command>\z"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, &command_message);
|
||||
|
||||
@@ -454,19 +453,19 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_model("gpt-5.4").with_config(|config| {
|
||||
config.tool_output_token_limit = Some(100);
|
||||
config.tool_output_token_limit = Some(250);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "user-shell-double-truncation";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": "for ($i=1; $i -le 2000; $i++) { Write-Output $i }",
|
||||
"command": "Start-Sleep -Milliseconds 200; for ($i=1; $i -le 2000; $i++) { Write-Output $i }",
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": "seq 1 2000",
|
||||
"command": "sh -c 'sleep 0.2; seq 1 2000'",
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
@@ -500,13 +499,16 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell_command call")?;
|
||||
let output = output.replace("\r\n", "\n");
|
||||
|
||||
let truncation_headers = output.matches("Total output lines:").count();
|
||||
|
||||
assert_eq!(
|
||||
truncation_headers, 1,
|
||||
"shell_command output should carry only one truncation header: {output}"
|
||||
let head = (1..=152).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (1901..=2000).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body = format!("{head}…1974 tokens truncated…{tail}");
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
r"\AExit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nWarning: truncated output \(original token count: 2224\)\nOutput:\n{escaped_truncated_body}\z"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -10,13 +10,20 @@ use codex_utils_string::truncate_middle_with_token_budget;
|
||||
pub use codex_protocol::protocol::TruncationPolicy;
|
||||
|
||||
pub fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
||||
truncate_text_with_original_token_count(content, policy).0
|
||||
}
|
||||
|
||||
pub fn truncate_text_with_original_token_count(
|
||||
content: &str,
|
||||
policy: TruncationPolicy,
|
||||
) -> (String, Option<usize>) {
|
||||
if content.len() <= policy.byte_budget() {
|
||||
return content.to_string();
|
||||
return (content.to_string(), None);
|
||||
}
|
||||
|
||||
let total_lines = content.lines().count();
|
||||
let result = truncate_text(content, policy);
|
||||
format!("Total output lines: {total_lines}\n\n{result}")
|
||||
let original_token_count = (result != content).then(|| approx_token_count(content));
|
||||
(result, original_token_count)
|
||||
}
|
||||
|
||||
pub fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
||||
@@ -54,9 +61,8 @@ pub fn formatted_truncate_text_content_items_with_policy(
|
||||
return (items.to_vec(), None);
|
||||
}
|
||||
|
||||
let mut out = vec![FunctionCallOutputContentItem::InputText {
|
||||
text: formatted_truncate_text(&combined, policy),
|
||||
}];
|
||||
let (text, original_token_count) = truncate_text_with_original_token_count(&combined, policy);
|
||||
let mut out = vec![FunctionCallOutputContentItem::InputText { text }];
|
||||
out.extend(items.iter().filter_map(|item| match item {
|
||||
FunctionCallOutputContentItem::InputImage { image_url, detail } => {
|
||||
Some(FunctionCallOutputContentItem::InputImage {
|
||||
@@ -67,25 +73,41 @@ pub fn formatted_truncate_text_content_items_with_policy(
|
||||
FunctionCallOutputContentItem::InputText { .. } => None,
|
||||
}));
|
||||
|
||||
(out, Some(approx_token_count(&combined)))
|
||||
(out, original_token_count)
|
||||
}
|
||||
|
||||
pub fn truncate_function_output_items_with_policy(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
policy: TruncationPolicy,
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
truncate_function_output_items_with_original_token_count(items, policy).0
|
||||
}
|
||||
|
||||
pub fn truncate_function_output_items_with_original_token_count(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
policy: TruncationPolicy,
|
||||
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining_budget = match policy {
|
||||
TruncationPolicy::Bytes(_) => policy.byte_budget(),
|
||||
TruncationPolicy::Tokens(_) => policy.token_budget(),
|
||||
};
|
||||
let mut omitted_text_items = 0usize;
|
||||
let mut truncated = false;
|
||||
let original_token_count = items
|
||||
.iter()
|
||||
.filter_map(|item| match item {
|
||||
FunctionCallOutputContentItem::InputText { text } => Some(approx_token_count(text)),
|
||||
FunctionCallOutputContentItem::InputImage { .. } => None,
|
||||
})
|
||||
.sum::<usize>();
|
||||
|
||||
for item in items {
|
||||
match item {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining_budget == 0 {
|
||||
omitted_text_items += 1;
|
||||
truncated = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -108,6 +130,7 @@ pub fn truncate_function_output_items_with_policy(
|
||||
} else {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: snippet });
|
||||
}
|
||||
truncated = true;
|
||||
remaining_budget = 0;
|
||||
}
|
||||
}
|
||||
@@ -126,7 +149,7 @@ pub fn truncate_function_output_items_with_policy(
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
(out, truncated.then_some(original_token_count))
|
||||
}
|
||||
|
||||
pub fn approx_tokens_from_byte_count_i64(bytes: i64) -> i64 {
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::approx_token_count;
|
||||
use crate::approx_tokens_from_byte_count_i64;
|
||||
use crate::formatted_truncate_text;
|
||||
use crate::formatted_truncate_text_content_items_with_policy;
|
||||
use crate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate_function_output_items_with_original_token_count;
|
||||
use crate::truncate_text;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
@@ -14,7 +14,7 @@ fn truncate_bytes_less_than_placeholder_returns_placeholder() {
|
||||
let content = "example output";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\n…13 chars truncated…t",
|
||||
"…13 chars truncated…t",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(1)),
|
||||
);
|
||||
}
|
||||
@@ -24,7 +24,7 @@ fn truncate_tokens_less_than_placeholder_returns_placeholder() {
|
||||
let content = "example output";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nex…3 tokens truncated…ut",
|
||||
"ex…3 tokens truncated…ut",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(1)),
|
||||
);
|
||||
}
|
||||
@@ -54,7 +54,7 @@ fn truncate_tokens_over_limit_returns_truncated() {
|
||||
let content = "this is an example of a long output that should be truncated";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nthis is an…10 tokens truncated… truncated",
|
||||
"this is an…10 tokens truncated… truncated",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(5)),
|
||||
);
|
||||
}
|
||||
@@ -64,29 +64,29 @@ fn truncate_bytes_over_limit_returns_truncated() {
|
||||
let content = "this is an example of a long output that should be truncated";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated",
|
||||
"this is an exam…30 chars truncated…ld be truncated",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_bytes_reports_original_line_count_when_truncated() {
|
||||
fn truncate_bytes_over_multiple_lines_returns_truncated() {
|
||||
let content =
|
||||
"this is an example of a long output that should be truncated\nalso some other line";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line",
|
||||
"this is an exam…51 chars truncated…some other line",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_tokens_reports_original_line_count_when_truncated() {
|
||||
fn truncate_tokens_over_multiple_lines_returns_truncated() {
|
||||
let content =
|
||||
"this is an example of a long output that should be truncated\nalso some other line";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line",
|
||||
"this is an example o…11 tokens truncated…also some other line",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
|
||||
);
|
||||
}
|
||||
@@ -109,6 +109,10 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
let t3 = chunk.repeat(10);
|
||||
let t4 = chunk.to_string();
|
||||
let t5 = chunk.to_string();
|
||||
let total_original_token_count = [&t1, &t2, &t3, &t4, &t5]
|
||||
.iter()
|
||||
.map(|text| approx_token_count(text))
|
||||
.sum::<usize>();
|
||||
|
||||
let items = vec![
|
||||
FunctionCallOutputContentItem::InputText { text: t1.clone() },
|
||||
@@ -122,10 +126,13 @@ fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||
];
|
||||
|
||||
let output =
|
||||
truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit));
|
||||
let (output, original_token_count) = truncate_function_output_items_with_original_token_count(
|
||||
&items,
|
||||
TruncationPolicy::Tokens(limit),
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 5);
|
||||
assert_eq!(original_token_count, Some(total_original_token_count));
|
||||
|
||||
let first_text = match &output[0] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
@@ -201,7 +208,7 @@ fn formatted_truncate_text_content_items_with_policy_preserves_empty_leading_tex
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![FunctionCallOutputContentItem::InputText {
|
||||
text: "Total output lines: 1\n\n…3 chars truncated…".to_string(),
|
||||
text: "…3 chars truncated…".to_string(),
|
||||
}]
|
||||
);
|
||||
assert_eq!(original_token_count, Some(1));
|
||||
@@ -236,7 +243,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_ima
|
||||
output,
|
||||
vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "Total output lines: 3\n\nabcd…6 chars truncated…ijkl".to_string(),
|
||||
text: "abcd…6 chars truncated…ijkl".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:one".to_string(),
|
||||
@@ -268,7 +275,7 @@ fn formatted_truncate_text_content_items_with_policy_merges_all_text_for_token_b
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![FunctionCallOutputContentItem::InputText {
|
||||
text: "Total output lines: 2\n\nabcd…3 tokens truncated…mnop".to_string(),
|
||||
text: "abcd…3 tokens truncated…mnop".to_string(),
|
||||
}]
|
||||
);
|
||||
assert_eq!(original_token_count, Some(5));
|
||||
|
||||
Reference in New Issue
Block a user