Prefix code mode output with success or failure message and include error stack (#14272)

2026-04-30 09:26:44 +00:00 · 2026-03-10 18:33:52 -07:00
parent cec211cabc
commit 24b8d443b8
5 changed files with 211 additions and 89 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;
-use std::process::ExitStatus;
 use std::sync::Arc;
+use std::time::Duration;

 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
@@ -10,6 +10,7 @@ use crate::exec_env::create_env;
 use crate::features::Feature;
 use crate::function_tool::FunctionCallError;
 use crate::tools::ToolRouter;
+use crate::tools::context::FunctionToolOutput;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolPayload;
 use crate::tools::js_repl::resolve_compatible_node;
@@ -81,6 +82,8 @@ enum NodeToHostMessage {
        content_items: Vec<JsonValue>,
        stored_values: HashMap<String, JsonValue>,
        #[serde(default)]
+        error_text: Option<String>,
+        #[serde(default)]
        max_output_tokens_per_exec_call: Option<usize>,
    },
 }
@@ -105,7 +108,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    ));
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
    section.push_str(&format!(
-        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n",
+        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
    ));
    section.push_str(
        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
@@ -121,7 +124,7 @@ pub(crate) async fn execute(
    turn: Arc<TurnContext>,
    tracker: SharedTurnDiffTracker,
    code: String,
-) -> Result<Vec<FunctionCallOutputContentItem>, FunctionCallError> {
+) -> Result<FunctionToolOutput, FunctionCallError> {
    let exec = ExecContext {
        session,
        turn,
@@ -140,8 +143,9 @@ async fn execute_node(
    source: String,
    enabled_tools: Vec<EnabledTool>,
    stored_values: HashMap<String, JsonValue>,
-) -> Result<Vec<FunctionCallOutputContentItem>, String> {
+) -> Result<FunctionToolOutput, String> {
    let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
+    let started_at = std::time::Instant::now();

    let env = create_env(&exec.turn.shell_environment_policy, None);
    let mut cmd = tokio::process::Command::new(&node_path);
@@ -190,7 +194,7 @@ async fn execute_node(
    .await?;

    let mut stdout_lines = BufReader::new(stdout).lines();
-    let mut final_content_items = None;
+    let mut pending_result = None;
    while let Some(line) = stdout_lines
        .next_line()
        .await
@@ -213,6 +217,7 @@ async fn execute_node(
            NodeToHostMessage::Result {
                content_items,
                stored_values,
+                error_text,
                max_output_tokens_per_exec_call,
            } => {
                exec.session
@@ -220,8 +225,9 @@ async fn execute_node(
                    .code_mode_store
                    .replace_stored_values(stored_values)
                    .await;
-                final_content_items = Some(truncate_code_mode_result(
+                pending_result = Some((
                    output_content_items_from_json_values(content_items)?,
+                    error_text,
                    max_output_tokens_per_exec_call,
                ));
                break;
@@ -238,20 +244,39 @@ async fn execute_node(
    let stderr = stderr_task
        .await
        .map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;
+    let wall_time = started_at.elapsed();
+    let success = status.success();

-    match final_content_items {
-        Some(content_items) if status.success() => Ok(content_items),
-        Some(_) => Err(format_runner_failure(
-            &format!("{PUBLIC_TOOL_NAME} execution failed"),
-            status,
-            &stderr,
-        )),
-        None => Err(format_runner_failure(
-            &format!("{PUBLIC_TOOL_NAME} runner exited without returning a result"),
-            status,
-            &stderr,
-        )),
+    let Some((mut content_items, error_text, max_output_tokens_per_exec_call)) = pending_result
+    else {
+        let message = if stderr.is_empty() {
+            format!("{PUBLIC_TOOL_NAME} runner exited without returning a result (status {status})")
+        } else {
+            stderr
+        };
+        return Err(message);
+    };
+
+    if !success {
+        let error_text = error_text.unwrap_or_else(|| {
+            if stderr.is_empty() {
+                format!("Process exited with status {status}")
+            } else {
+                stderr
+            }
+        });
+        content_items.push(FunctionCallOutputContentItem::InputText {
+            text: format!("Script error:\n{error_text}"),
+        });
    }
+
+    let mut content_items =
+        truncate_code_mode_result(content_items, max_output_tokens_per_exec_call);
+    prepend_script_status(&mut content_items, success, wall_time);
+    Ok(FunctionToolOutput::from_content(
+        content_items,
+        Some(success),
+    ))
 }

 async fn write_message(
@@ -274,15 +299,21 @@ async fn write_message(
        .map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
 }

-fn append_stderr(message: String, stderr: &str) -> String {
-    if stderr.trim().is_empty() {
-        return message;
-    }
-    format!("{message}\n\nnode stderr:\n{stderr}")
-}
-
-fn format_runner_failure(message: &str, status: ExitStatus, stderr: &str) -> String {
-    append_stderr(format!("{message} (status {status})"), stderr)
+fn prepend_script_status(
+    content_items: &mut Vec<FunctionCallOutputContentItem>,
+    success: bool,
+    wall_time: Duration,
+) {
+    let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
+    let header = format!(
+        "{}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n",
+        if success {
+            "Script completed"
+        } else {
+            "Script failed"
+        }
+    );
+    content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
 }

 fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
@@ -301,25 +332,17 @@ fn truncate_code_mode_result(
    max_output_tokens_per_exec_call: Option<usize>,
 ) -> Vec<FunctionCallOutputContentItem> {
    let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
+    let policy = TruncationPolicy::Tokens(max_output_tokens);
    if items
        .iter()
        .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
    {
-        let (mut truncated_items, original_token_count) =
-            formatted_truncate_text_content_items_with_policy(
-                &items,
-                TruncationPolicy::Tokens(max_output_tokens),
-            );
-        if let Some(original_token_count) = original_token_count
-            && let Some(FunctionCallOutputContentItem::InputText { text }) =
-                truncated_items.first_mut()
-        {
-            *text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
-        }
+        let (truncated_items, _) =
+            formatted_truncate_text_content_items_with_policy(&items, policy);
        return truncated_items;
    }

-    truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
+    truncate_function_output_items_with_policy(&items, policy)
 }

 async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {