Prefix code mode output with success or failure message and include error stack (#14272)

2026-04-24 14:45:27 +00:00 · 2026-03-10 18:33:52 -07:00
parent cec211cabc
commit 24b8d443b8
5 changed files with 211 additions and 89 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;
-use std::process::ExitStatus;
 use std::sync::Arc;
+use std::time::Duration;

 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
@@ -10,6 +10,7 @@ use crate::exec_env::create_env;
 use crate::features::Feature;
 use crate::function_tool::FunctionCallError;
 use crate::tools::ToolRouter;
+use crate::tools::context::FunctionToolOutput;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolPayload;
 use crate::tools::js_repl::resolve_compatible_node;
@@ -81,6 +82,8 @@ enum NodeToHostMessage {
        content_items: Vec<JsonValue>,
        stored_values: HashMap<String, JsonValue>,
        #[serde(default)]
+        error_text: Option<String>,
+        #[serde(default)]
        max_output_tokens_per_exec_call: Option<usize>,
    },
 }
@@ -105,7 +108,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    ));
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
    section.push_str(&format!(
-        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n",
+        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
    ));
    section.push_str(
        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
@@ -121,7 +124,7 @@ pub(crate) async fn execute(
    turn: Arc<TurnContext>,
    tracker: SharedTurnDiffTracker,
    code: String,
-) -> Result<Vec<FunctionCallOutputContentItem>, FunctionCallError> {
+) -> Result<FunctionToolOutput, FunctionCallError> {
    let exec = ExecContext {
        session,
        turn,
@@ -140,8 +143,9 @@ async fn execute_node(
    source: String,
    enabled_tools: Vec<EnabledTool>,
    stored_values: HashMap<String, JsonValue>,
-) -> Result<Vec<FunctionCallOutputContentItem>, String> {
+) -> Result<FunctionToolOutput, String> {
    let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
+    let started_at = std::time::Instant::now();

    let env = create_env(&exec.turn.shell_environment_policy, None);
    let mut cmd = tokio::process::Command::new(&node_path);
@@ -190,7 +194,7 @@ async fn execute_node(
    .await?;

    let mut stdout_lines = BufReader::new(stdout).lines();
-    let mut final_content_items = None;
+    let mut pending_result = None;
    while let Some(line) = stdout_lines
        .next_line()
        .await
@@ -213,6 +217,7 @@ async fn execute_node(
            NodeToHostMessage::Result {
                content_items,
                stored_values,
+                error_text,
                max_output_tokens_per_exec_call,
            } => {
                exec.session
@@ -220,8 +225,9 @@ async fn execute_node(
                    .code_mode_store
                    .replace_stored_values(stored_values)
                    .await;
-                final_content_items = Some(truncate_code_mode_result(
+                pending_result = Some((
                    output_content_items_from_json_values(content_items)?,
+                    error_text,
                    max_output_tokens_per_exec_call,
                ));
                break;
@@ -238,20 +244,39 @@ async fn execute_node(
    let stderr = stderr_task
        .await
        .map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;
+    let wall_time = started_at.elapsed();
+    let success = status.success();

-    match final_content_items {
-        Some(content_items) if status.success() => Ok(content_items),
-        Some(_) => Err(format_runner_failure(
-            &format!("{PUBLIC_TOOL_NAME} execution failed"),
-            status,
-            &stderr,
-        )),
-        None => Err(format_runner_failure(
-            &format!("{PUBLIC_TOOL_NAME} runner exited without returning a result"),
-            status,
-            &stderr,
-        )),
+    let Some((mut content_items, error_text, max_output_tokens_per_exec_call)) = pending_result
+    else {
+        let message = if stderr.is_empty() {
+            format!("{PUBLIC_TOOL_NAME} runner exited without returning a result (status {status})")
+        } else {
+            stderr
+        };
+        return Err(message);
+    };
+
+    if !success {
+        let error_text = error_text.unwrap_or_else(|| {
+            if stderr.is_empty() {
+                format!("Process exited with status {status}")
+            } else {
+                stderr
+            }
+        });
+        content_items.push(FunctionCallOutputContentItem::InputText {
+            text: format!("Script error:\n{error_text}"),
+        });
    }
+
+    let mut content_items =
+        truncate_code_mode_result(content_items, max_output_tokens_per_exec_call);
+    prepend_script_status(&mut content_items, success, wall_time);
+    Ok(FunctionToolOutput::from_content(
+        content_items,
+        Some(success),
+    ))
 }

 async fn write_message(
@@ -274,15 +299,21 @@ async fn write_message(
        .map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
 }

-fn append_stderr(message: String, stderr: &str) -> String {
-    if stderr.trim().is_empty() {
-        return message;
-    }
-    format!("{message}\n\nnode stderr:\n{stderr}")
-}
-
-fn format_runner_failure(message: &str, status: ExitStatus, stderr: &str) -> String {
-    append_stderr(format!("{message} (status {status})"), stderr)
+fn prepend_script_status(
+    content_items: &mut Vec<FunctionCallOutputContentItem>,
+    success: bool,
+    wall_time: Duration,
+) {
+    let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
+    let header = format!(
+        "{}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n",
+        if success {
+            "Script completed"
+        } else {
+            "Script failed"
+        }
+    );
+    content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
 }

 fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
@@ -301,25 +332,17 @@ fn truncate_code_mode_result(
    max_output_tokens_per_exec_call: Option<usize>,
 ) -> Vec<FunctionCallOutputContentItem> {
    let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
+    let policy = TruncationPolicy::Tokens(max_output_tokens);
    if items
        .iter()
        .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
    {
-        let (mut truncated_items, original_token_count) =
-            formatted_truncate_text_content_items_with_policy(
-                &items,
-                TruncationPolicy::Tokens(max_output_tokens),
-            );
-        if let Some(original_token_count) = original_token_count
-            && let Some(FunctionCallOutputContentItem::InputText { text }) =
-                truncated_items.first_mut()
-        {
-            *text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
-        }
+        let (truncated_items, _) =
+            formatted_truncate_text_content_items_with_policy(&items, policy);
        return truncated_items;
    }

-    truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
+    truncate_function_output_items_with_policy(&items, policy)
 }

 async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
--- a/codex-rs/core/src/tools/code_mode_runner.cjs
+++ b/codex-rs/core/src/tools/code_mode_runner.cjs
@@ -104,6 +104,10 @@ function readContentItems(context) {
  }
 }

+function formatErrorText(error) {
+  return String(error && error.stack ? error.stack : error);
+}
+
 function isValidIdentifier(name) {
  return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
 }
@@ -378,11 +382,11 @@ async function main() {
    });
    process.exit(0);
  } catch (error) {
-    process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
    await protocol.send({
      type: 'result',
      content_items: readContentItems(context),
      stored_values: state.storedValues,
+      error_text: formatErrorText(error),
      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
    });
    process.exit(1);
@@ -391,7 +395,7 @@ async function main() {

 void main().catch(async (error) => {
  try {
-    process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
+    process.stderr.write(`${formatErrorText(error)}\n`);
  } finally {
    process.exitCode = 1;
  }
--- a/codex-rs/core/src/tools/handlers/code_mode.rs
+++ b/codex-rs/core/src/tools/handlers/code_mode.rs
@@ -48,7 +48,6 @@ impl ToolHandler for CodeModeHandler {
            }
        };

-        let content_items = code_mode::execute(session, turn, tracker, code).await?;
-        Ok(FunctionToolOutput::from_content(content_items, Some(true)))
+        code_mode::execute(session, turn, tracker, code).await
    }
 }
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -1621,7 +1621,7 @@ source: /[\s\S]+/
        enabled_tool_names.join(", ")
    };
    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
+        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
    );

    ToolSpec::Freeform(FreeformTool {
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -24,14 +24,35 @@ use std::fs;
 use std::time::Duration;
 use wiremock::MockServer;

-fn custom_tool_output_text_and_success(
+fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
+    req.custom_tool_call_output(call_id)
+        .get("output")
+        .and_then(Value::as_array)
+        .expect("custom tool output should be serialized as content items")
+        .clone()
+}
+
+fn text_item(items: &[Value], index: usize) -> &str {
+    items[index]
+        .get("text")
+        .and_then(Value::as_str)
+        .expect("content item should be input_text")
+}
+
+fn custom_tool_output_body_and_success(
    req: &ResponsesRequest,
    call_id: &str,
 ) -> (String, Option<bool>) {
-    let (output, success) = req
+    let (_, success) = req
        .custom_tool_call_output_content_and_success(call_id)
        .expect("custom tool output should be present");
-    (output.unwrap_or_default(), success)
+    let items = custom_tool_output_items(req, call_id);
+    let output = items
+        .iter()
+        .skip(1)
+        .filter_map(|item| item.get("text").and_then(Value::as_str))
+        .collect();
+    (output, success)
 }

 async fn run_code_mode_turn(
@@ -152,13 +173,16 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
    .await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
-    assert_ne!(
-        success,
-        Some(false),
-        "exec call failed unexpectedly: {output}"
+    let items = custom_tool_output_items(&req, "call-1");
+    assert_eq!(items.len(), 2);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
    );
-    let parsed: Value = serde_json::from_str(&output)?;
+    let parsed: Value = serde_json::from_str(text_item(&items, 1))?;
    assert!(
        parsed
            .get("chunk_id")
@@ -201,22 +225,66 @@ add_content(JSON.stringify(await exec_command({
    .await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
-    assert_ne!(
-        success,
-        Some(false),
-        "exec call failed unexpectedly: {output}"
+    let items = custom_tool_output_items(&req, "call-1");
+    assert_eq!(items.len(), 2);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
    );
    let expected_pattern = r#"(?sx)
 \A
-Original\ token\ count:\ \d+\n
-Output:\n
 Total\ output\ lines:\ 1\n
 \n
-\{"chunk_id".*…\d+\ tokens\ truncated….*
+.*…\d+\ tokens\ truncated….*
 \z
 "#;
-    assert_regex_match(expected_pattern, &output);
+    assert_regex_match(expected_pattern, text_item(&items, 1));
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let (_test, second_mock) = run_code_mode_turn(
+        &server,
+        "use code_mode to surface script failures",
+        r#"
+add_content("before crash");
+add_content("still before crash");
+throw new Error("boom");
+"#,
+        false,
+    )
+    .await?;
+
+    let req = second_mock.single_request();
+    let items = custom_tool_output_items(&req, "call-1");
+    assert_eq!(items.len(), 4);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
+    );
+    assert_eq!(text_item(&items, 1), "before crash");
+    assert_eq!(text_item(&items, 2), "still before crash");
+    assert_regex_match(
+        r#"(?sx)
+\A
+Script\ error:\n
+Error:\ boom\n
+(?:\s+at\ .+\n?)+
+\z
+"#,
+        text_item(&items, 3),
+    );

    Ok(())
 }
@@ -239,7 +307,7 @@ output_text({ json: true });
    .await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
@@ -270,14 +338,25 @@ output_text(circular);
    .await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let items = custom_tool_output_items(&req, "call-1");
+    let (_, success) = req
+        .custom_tool_call_output_content_and_success("call-1")
+        .expect("custom tool output should be present");
    assert_ne!(
        success,
        Some(true),
        "circular stringify unexpectedly succeeded"
    );
-    assert!(output.contains("exec execution failed"));
-    assert!(output.contains("Converting circular structure to JSON"));
+    assert_eq!(items.len(), 2);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
+    );
+    assert!(text_item(&items, 1).contains("Script error:"));
+    assert!(text_item(&items, 1).contains("Converting circular structure to JSON"));

    Ok(())
 }
@@ -301,28 +380,34 @@ output_image("data:image/png;base64,AAA");
    .await?;

    let req = second_mock.single_request();
-    let (_, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let items = custom_tool_output_items(&req, "call-1");
+    let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
        "code_mode image output failed unexpectedly"
    );
+    assert_eq!(items.len(), 3);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
+    );
    assert_eq!(
-        req.custom_tool_call_output("call-1"),
+        items[1],
        serde_json::json!({
-            "type": "custom_tool_call_output",
-            "call_id": "call-1",
-            "output": [
-                {
-                    "type": "input_image",
-                    "image_url": "https://example.com/image.jpg"
-                },
-                {
-                    "type": "input_image",
-                    "image_url": "data:image/png;base64,AAA"
-                }
-            ]
-        })
+            "type": "input_image",
+            "image_url": "https://example.com/image.jpg"
+        }),
+    );
+    assert_eq!(
+        items[2],
+        serde_json::json!({
+            "type": "input_image",
+            "image_url": "data:image/png;base64,AAA"
+        }),
    );

    Ok(())
@@ -345,11 +430,22 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
        run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let items = custom_tool_output_items(&req, "call-1");
+    let (_, success) = req
+        .custom_tool_call_output_content_and_success("call-1")
+        .expect("custom tool output should be present");
    assert_ne!(
        success,
        Some(false),
-        "exec apply_patch call failed unexpectedly: {output}"
+        "exec apply_patch call failed unexpectedly: {items:?}"
+    );
+    assert_eq!(items.len(), 2);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&items, 0),
    );

    let file_path = test.cwd_path().join(file_name);
@@ -381,7 +477,7 @@ add_content(
        run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
@@ -420,7 +516,7 @@ add_content(
        run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
@@ -464,7 +560,7 @@ add_content(
    .await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
@@ -505,7 +601,7 @@ add_content(
        run_code_mode_turn_with_rmcp(&server, "use exec to call rmcp echo badly", code).await?;

    let req = second_mock.single_request();
-    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
    assert_ne!(
        success,
        Some(false),
@@ -562,7 +658,7 @@ add_content("stored");

    let first_request = first_follow_up.single_request();
    let (first_output, first_success) =
-        custom_tool_output_text_and_success(&first_request, "call-1");
+        custom_tool_output_body_and_success(&first_request, "call-1");
    assert_ne!(
        first_success,
        Some(false),
@@ -600,7 +696,7 @@ add_content(JSON.stringify(load("nb")));

    let second_request = second_follow_up.single_request();
    let (second_output, second_success) =
-        custom_tool_output_text_and_success(&second_request, "call-2");
+        custom_tool_output_body_and_success(&second_request, "call-2");
    assert_ne!(
        second_success,
        Some(false),