mirror of
https://github.com/openai/codex.git
synced 2026-05-17 09:43:19 +00:00
## Why
Large MCP tool call outputs can make rollout JSONL files enormous. In
the session that motivated this change, the biggest JSONL records were:
- `event_msg/mcp_tool_call_end`
- `response_item/function_call_output`
both containing the same unbounded MCP payloads - just 3 MCP tool calls
that each were multi-hundred MBs 😱
This PR truncates both of those JSONL records.
## How
#### For `response_item/function_call_output`
Unified exec already bounds tool output before it is injected into
model-facing history, which also keeps the corresponding rollout
`response_item/function_call_output` records small.
MCP should follow the same pattern: truncate the model-facing tool
output at the tool-output boundary, while leaving code-mode/raw hook
consumers alone.
#### For `event_msg/mcp_tool_call_end`
`McpToolCallEnd` also needs its own bounded event copy because it is the
app-server/replay/UI event shape that backs `ThreadItem::McpToolCall`.
Unfortunately this is _not_ downstream of the `ToolOutput` trait.
## Model behavior
Model behavior is actually unchanged as a result of this PR.
Before this PR, MCP output was:
1. Converted to `FunctionCallOutput`.
2. Recorded into in-memory history.
3. Truncated by `ContextManager::record_items()` before later model
turns saw it.
After this branch, MCP output is truncated earlier, in
`McpToolOutput::response_payload()`, using the same helper. Then
`ContextManager::record_items()` sees an already-truncated output and
effectively has little/no additional work to do.
So the model should still see the same kind of truncated function-call
output. The practical difference is where truncation happens: earlier,
before rollout persistence/app-server emission can see the giant
payload.
## Verification
- `cargo test -p codex-core mcp_tool_output`
- `cargo test -p codex-core
mcp_tool_call::tests::truncate_mcp_tool_result_for_event`
- `cargo test -p codex-core
mcp_post_tool_use_payload_uses_model_tool_name_args_and_result`
- `just fmt`
- `just fix -p codex-core`
- `git diff --check`
472 lines
15 KiB
Rust
472 lines
15 KiB
Rust
use super::*;
|
|
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
|
use core_test_support::assert_regex_match;
|
|
use pretty_assertions::assert_eq;
|
|
use serde_json::json;
|
|
|
|
#[test]
|
|
fn custom_tool_calls_should_roundtrip_as_custom_outputs() {
|
|
let payload = ToolPayload::Custom {
|
|
input: "patch".to_string(),
|
|
};
|
|
let response = FunctionToolOutput::from_text("patched".to_string(), Some(true))
|
|
.to_response_item("call-42", &payload);
|
|
|
|
match response {
|
|
ResponseInputItem::CustomToolCallOutput {
|
|
call_id, output, ..
|
|
} => {
|
|
assert_eq!(call_id, "call-42");
|
|
assert_eq!(output.content_items(), None);
|
|
assert_eq!(output.body.to_text().as_deref(), Some("patched"));
|
|
assert_eq!(output.success, Some(true));
|
|
}
|
|
other => panic!("expected CustomToolCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn function_payloads_remain_function_outputs() {
|
|
let payload = ToolPayload::Function {
|
|
arguments: "{}".to_string(),
|
|
};
|
|
let response = FunctionToolOutput::from_text("ok".to_string(), Some(true))
|
|
.to_response_item("fn-1", &payload);
|
|
|
|
match response {
|
|
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
|
assert_eq!(call_id, "fn-1");
|
|
assert_eq!(output.content_items(), None);
|
|
assert_eq!(output.body.to_text().as_deref(), Some("ok"));
|
|
assert_eq!(output.success, Some(true));
|
|
}
|
|
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn mcp_code_mode_result_serializes_full_call_tool_result() {
|
|
let output = CallToolResult {
|
|
content: vec![serde_json::json!({
|
|
"type": "text",
|
|
"text": "ignored",
|
|
})],
|
|
structured_content: Some(serde_json::json!({
|
|
"threadId": "thread_123",
|
|
"content": "done",
|
|
})),
|
|
is_error: Some(false),
|
|
meta: Some(serde_json::json!({
|
|
"source": "mcp",
|
|
})),
|
|
};
|
|
|
|
let result = output.code_mode_result(&ToolPayload::Mcp {
|
|
server: "server".to_string(),
|
|
tool: "tool".to_string(),
|
|
raw_arguments: "{}".to_string(),
|
|
});
|
|
|
|
assert_eq!(
|
|
result,
|
|
serde_json::json!({
|
|
"content": [{
|
|
"type": "text",
|
|
"text": "ignored",
|
|
}],
|
|
"structuredContent": {
|
|
"threadId": "thread_123",
|
|
"content": "done",
|
|
},
|
|
"isError": false,
|
|
"_meta": {
|
|
"source": "mcp",
|
|
},
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn mcp_tool_output_response_item_includes_wall_time() {
|
|
let output = McpToolOutput {
|
|
result: CallToolResult {
|
|
content: vec![serde_json::json!({
|
|
"type": "text",
|
|
"text": "done",
|
|
})],
|
|
structured_content: None,
|
|
is_error: Some(false),
|
|
meta: None,
|
|
},
|
|
tool_input: json!({}),
|
|
wall_time: std::time::Duration::from_millis(1250),
|
|
original_image_detail_supported: false,
|
|
truncation_policy: TruncationPolicy::Bytes(1024),
|
|
};
|
|
|
|
let response = output.to_response_item(
|
|
"mcp-call-1",
|
|
&ToolPayload::Mcp {
|
|
server: "server".to_string(),
|
|
tool: "tool".to_string(),
|
|
raw_arguments: "{}".to_string(),
|
|
},
|
|
);
|
|
|
|
match response {
|
|
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
|
assert_eq!(call_id, "mcp-call-1");
|
|
assert_eq!(output.success, Some(true));
|
|
let Some(text) = output.body.to_text() else {
|
|
panic!("MCP output should serialize as text");
|
|
};
|
|
let Some(payload) = text.strip_prefix("Wall time: 1.2500 seconds\nOutput:\n") else {
|
|
panic!("MCP output should include wall-time header: {text}");
|
|
};
|
|
let parsed: serde_json::Value = serde_json::from_str(payload).unwrap_or_else(|err| {
|
|
panic!("MCP output should serialize JSON content: {err}");
|
|
});
|
|
assert_eq!(
|
|
parsed,
|
|
json!([{
|
|
"type": "text",
|
|
"text": "done",
|
|
}])
|
|
);
|
|
}
|
|
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn mcp_tool_output_response_item_truncates_large_structured_content() {
|
|
let output = McpToolOutput {
|
|
result: CallToolResult {
|
|
content: vec![serde_json::json!({
|
|
"type": "text",
|
|
"text": "ignored when structured content is present",
|
|
})],
|
|
structured_content: Some(serde_json::json!({
|
|
"items": "large structured value ".repeat(1_000),
|
|
})),
|
|
is_error: Some(false),
|
|
meta: None,
|
|
},
|
|
tool_input: json!({}),
|
|
wall_time: std::time::Duration::from_millis(1250),
|
|
original_image_detail_supported: false,
|
|
truncation_policy: TruncationPolicy::Bytes(128),
|
|
};
|
|
|
|
let response = output.to_response_item(
|
|
"mcp-call-large",
|
|
&ToolPayload::Mcp {
|
|
server: "server".to_string(),
|
|
tool: "tool".to_string(),
|
|
raw_arguments: "{}".to_string(),
|
|
},
|
|
);
|
|
|
|
match response {
|
|
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
|
assert_eq!(call_id, "mcp-call-large");
|
|
assert_eq!(output.success, Some(true));
|
|
let text = output
|
|
.body
|
|
.to_text()
|
|
.expect("MCP output should serialize as text");
|
|
assert!(text.starts_with("Wall time: 1.2500 seconds\nOutput:\n"));
|
|
assert!(text.contains("chars truncated"));
|
|
assert!(!text.contains("ignored when structured content is present"));
|
|
}
|
|
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn mcp_tool_output_response_item_preserves_content_items() {
|
|
let image_url = "data:image/png;base64,AAA";
|
|
let output = McpToolOutput {
|
|
result: CallToolResult {
|
|
content: vec![serde_json::json!({
|
|
"type": "image",
|
|
"mimeType": "image/png",
|
|
"data": "AAA",
|
|
})],
|
|
structured_content: None,
|
|
is_error: Some(false),
|
|
meta: None,
|
|
},
|
|
tool_input: json!({}),
|
|
wall_time: std::time::Duration::from_millis(500),
|
|
original_image_detail_supported: false,
|
|
truncation_policy: TruncationPolicy::Bytes(1024),
|
|
};
|
|
|
|
let response = output.to_response_item(
|
|
"mcp-call-2",
|
|
&ToolPayload::Mcp {
|
|
server: "server".to_string(),
|
|
tool: "tool".to_string(),
|
|
raw_arguments: "{}".to_string(),
|
|
},
|
|
);
|
|
|
|
match response {
|
|
ResponseInputItem::FunctionCallOutput { output, .. } => {
|
|
assert_eq!(
|
|
output.content_items(),
|
|
Some(
|
|
vec![
|
|
FunctionCallOutputContentItem::InputText {
|
|
text: "Wall time: 0.5000 seconds\nOutput:".to_string(),
|
|
},
|
|
FunctionCallOutputContentItem::InputImage {
|
|
image_url: image_url.to_string(),
|
|
detail: Some(DEFAULT_IMAGE_DETAIL),
|
|
},
|
|
]
|
|
.as_slice()
|
|
)
|
|
);
|
|
assert_eq!(
|
|
output.body.to_text().as_deref(),
|
|
Some("Wall time: 0.5000 seconds\nOutput:")
|
|
);
|
|
}
|
|
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn mcp_tool_output_code_mode_result_stays_raw_call_tool_result() {
|
|
let large_content = "large structured value ".repeat(1_000);
|
|
let output = McpToolOutput {
|
|
result: CallToolResult {
|
|
content: vec![serde_json::json!({
|
|
"type": "text",
|
|
"text": "ignored",
|
|
})],
|
|
structured_content: Some(serde_json::json!({
|
|
"content": large_content,
|
|
})),
|
|
is_error: Some(false),
|
|
meta: None,
|
|
},
|
|
tool_input: json!({}),
|
|
wall_time: std::time::Duration::from_millis(1250),
|
|
original_image_detail_supported: false,
|
|
truncation_policy: TruncationPolicy::Bytes(64),
|
|
};
|
|
|
|
let result = output.code_mode_result(&ToolPayload::Mcp {
|
|
server: "server".to_string(),
|
|
tool: "tool".to_string(),
|
|
raw_arguments: "{}".to_string(),
|
|
});
|
|
|
|
assert_eq!(
|
|
result,
|
|
serde_json::json!({
|
|
"content": [{
|
|
"type": "text",
|
|
"text": "ignored",
|
|
}],
|
|
"structuredContent": {
|
|
"content": "large structured value ".repeat(1_000),
|
|
},
|
|
"isError": false,
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn custom_tool_calls_can_derive_text_from_content_items() {
|
|
let payload = ToolPayload::Custom {
|
|
input: "patch".to_string(),
|
|
};
|
|
let response = FunctionToolOutput::from_content(
|
|
vec![
|
|
FunctionCallOutputContentItem::InputText {
|
|
text: "line 1".to_string(),
|
|
},
|
|
FunctionCallOutputContentItem::InputImage {
|
|
image_url: "data:image/png;base64,AAA".to_string(),
|
|
detail: Some(DEFAULT_IMAGE_DETAIL),
|
|
},
|
|
FunctionCallOutputContentItem::InputText {
|
|
text: "line 2".to_string(),
|
|
},
|
|
],
|
|
Some(true),
|
|
)
|
|
.to_response_item("call-99", &payload);
|
|
|
|
match response {
|
|
ResponseInputItem::CustomToolCallOutput {
|
|
call_id, output, ..
|
|
} => {
|
|
let expected = vec![
|
|
FunctionCallOutputContentItem::InputText {
|
|
text: "line 1".to_string(),
|
|
},
|
|
FunctionCallOutputContentItem::InputImage {
|
|
image_url: "data:image/png;base64,AAA".to_string(),
|
|
detail: Some(DEFAULT_IMAGE_DETAIL),
|
|
},
|
|
FunctionCallOutputContentItem::InputText {
|
|
text: "line 2".to_string(),
|
|
},
|
|
];
|
|
assert_eq!(call_id, "call-99");
|
|
assert_eq!(output.content_items(), Some(expected.as_slice()));
|
|
assert_eq!(output.body.to_text().as_deref(), Some("line 1\nline 2"));
|
|
assert_eq!(output.success, Some(true));
|
|
}
|
|
other => panic!("expected CustomToolCallOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn tool_search_payloads_roundtrip_as_tool_search_outputs() {
|
|
let payload = ToolPayload::ToolSearch {
|
|
arguments: SearchToolCallParams {
|
|
query: "calendar".to_string(),
|
|
limit: None,
|
|
},
|
|
};
|
|
let response = ToolSearchOutput {
|
|
tools: vec![LoadableToolSpec::Function(codex_tools::ResponsesApiTool {
|
|
name: "create_event".to_string(),
|
|
description: String::new(),
|
|
strict: false,
|
|
defer_loading: Some(true),
|
|
parameters: codex_tools::JsonSchema::object(
|
|
/*properties*/ Default::default(),
|
|
/*required*/ None,
|
|
/*additional_properties*/ None,
|
|
),
|
|
output_schema: None,
|
|
})],
|
|
}
|
|
.to_response_item("search-1", &payload);
|
|
|
|
match response {
|
|
ResponseInputItem::ToolSearchOutput {
|
|
call_id,
|
|
status,
|
|
execution,
|
|
tools,
|
|
} => {
|
|
assert_eq!(call_id, "search-1");
|
|
assert_eq!(status, "completed");
|
|
assert_eq!(execution, "client");
|
|
assert_eq!(
|
|
tools,
|
|
vec![json!({
|
|
"type": "function",
|
|
"name": "create_event",
|
|
"description": "",
|
|
"strict": false,
|
|
"defer_loading": true,
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {}
|
|
}
|
|
})]
|
|
);
|
|
}
|
|
other => panic!("expected ToolSearchOutput, got {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn log_preview_uses_content_items_when_plain_text_is_missing() {
|
|
let output = FunctionToolOutput::from_content(
|
|
vec![FunctionCallOutputContentItem::InputText {
|
|
text: "preview".to_string(),
|
|
}],
|
|
Some(true),
|
|
);
|
|
|
|
assert_eq!(output.log_preview(), "preview");
|
|
assert_eq!(
|
|
function_call_output_content_items_to_text(&output.body),
|
|
Some("preview".to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn telemetry_preview_returns_original_within_limits() {
|
|
let content = "short output";
|
|
assert_eq!(telemetry_preview(content), content);
|
|
}
|
|
|
|
#[test]
|
|
fn telemetry_preview_truncates_by_bytes() {
|
|
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
|
let preview = telemetry_preview(&content);
|
|
|
|
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
|
assert!(
|
|
preview.len()
|
|
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn telemetry_preview_truncates_by_lines() {
|
|
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
|
.map(|idx| format!("line {idx}"))
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
|
|
let preview = telemetry_preview(&content);
|
|
let lines: Vec<&str> = preview.lines().collect();
|
|
|
|
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
|
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
|
}
|
|
|
|
#[test]
|
|
fn exec_command_tool_output_formats_truncated_response() {
|
|
let payload = ToolPayload::Function {
|
|
arguments: "{}".to_string(),
|
|
};
|
|
let response = ExecCommandToolOutput {
|
|
event_call_id: "call-42".to_string(),
|
|
chunk_id: "abc123".to_string(),
|
|
wall_time: std::time::Duration::from_millis(1250),
|
|
raw_output: b"token one token two token three token four token five".to_vec(),
|
|
max_output_tokens: Some(4),
|
|
process_id: None,
|
|
exit_code: Some(0),
|
|
original_token_count: Some(10),
|
|
hook_command: None,
|
|
}
|
|
.to_response_item("call-42", &payload);
|
|
|
|
match response {
|
|
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
|
assert_eq!(call_id, "call-42");
|
|
assert_eq!(output.success, Some(true));
|
|
let text = output
|
|
.body
|
|
.to_text()
|
|
.expect("exec output should serialize as text");
|
|
assert_regex_match(
|
|
r#"(?sx)
|
|
^Chunk\ ID:\ abc123
|
|
\nWall\ time:\ \d+\.\d{4}\ seconds
|
|
\nProcess\ exited\ with\ code\ 0
|
|
\nOriginal\ token\ count:\ 10
|
|
\nOutput:
|
|
\n.*tokens\ truncated.*
|
|
$"#,
|
|
&text,
|
|
);
|
|
}
|
|
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
|
}
|
|
}
|