Add model-controlled truncation for code mode results (#14258)

Summary - document that `@openai/code_mode` exposes `set_max_output_tokens_per_exec_call` and that `code_mode` truncates the final Rust-side output when the budget is exceeded - enforce the configured budget in the Rust tool runner, reusing truncation helpers so text-only outputs follow the unified-exec wrapper and mixed outputs still fit within the limit - ensure the new behavior is covered by a code-mode integration test and string spec update Testing - Not run (not requested)
2026-04-30 09:26:44 +00:00 · 2026-03-10 15:57:14 -07:00
parent c7e28cffab
commit e791559029
5 changed files with 284 additions and 38 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -14,6 +14,10 @@ use crate::tools::context::ToolPayload;
 use crate::tools::js_repl::resolve_compatible_node;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolCallSource;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::formatted_truncate_text_content_items_with_policy;
+use crate::truncate::truncate_function_output_items_with_policy;
+use crate::unified_exec::resolve_max_tokens;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use serde::Deserialize;
 use serde::Serialize;
@@ -72,6 +76,8 @@ enum NodeToHostMessage {
    },
    Result {
        content_items: Vec<JsonValue>,
+        #[serde(default)]
+        max_output_tokens_per_exec_call: Option<usize>,
    },
 }

@@ -88,6 +94,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
    section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
+    section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
    section.push_str(
        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
    );
@@ -187,8 +194,14 @@ async fn execute_node(
                };
                write_message(&mut stdin, &response).await?;
            }
-            NodeToHostMessage::Result { content_items } => {
-                final_content_items = Some(output_content_items_from_json_values(content_items)?);
+            NodeToHostMessage::Result {
+                content_items,
+                max_output_tokens_per_exec_call,
+            } => {
+                final_content_items = Some(truncate_code_mode_result(
+                    output_content_items_from_json_values(content_items)?,
+                    max_output_tokens_per_exec_call,
+                ));
                break;
            }
        }
@@ -261,6 +274,32 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String
        .replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
 }

+fn truncate_code_mode_result(
+    items: Vec<FunctionCallOutputContentItem>,
+    max_output_tokens_per_exec_call: Option<usize>,
+) -> Vec<FunctionCallOutputContentItem> {
+    let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
+    if items
+        .iter()
+        .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
+    {
+        let (mut truncated_items, original_token_count) =
+            formatted_truncate_text_content_items_with_policy(
+                &items,
+                TruncationPolicy::Tokens(max_output_tokens),
+            );
+        if let Some(original_token_count) = original_token_count
+            && let Some(FunctionCallOutputContentItem::InputText { text }) =
+                truncated_items.first_mut()
+        {
+            *text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
+        }
+        return truncated_items;
+    }
+
+    truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
+}
+
 async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
    let router = build_nested_router(exec).await;
    let mcp_tool_names = exec