mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
Prefix code mode output with success or failure message and include error stack (#14272)
This commit is contained in:
committed by
Michael Bolin
parent
da74da6684
commit
01792a4c61
@@ -1,6 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::process::ExitStatus;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
@@ -10,6 +10,7 @@ use crate::exec_env::create_env;
|
||||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::js_repl::resolve_compatible_node;
|
||||
@@ -81,6 +82,8 @@ enum NodeToHostMessage {
|
||||
content_items: Vec<JsonValue>,
|
||||
stored_values: HashMap<String, JsonValue>,
|
||||
#[serde(default)]
|
||||
error_text: Option<String>,
|
||||
#[serde(default)]
|
||||
max_output_tokens_per_exec_call: Option<usize>,
|
||||
},
|
||||
}
|
||||
@@ -105,7 +108,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
|
||||
));
|
||||
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
|
||||
section.push_str(&format!(
|
||||
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n",
|
||||
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
|
||||
));
|
||||
section.push_str(
|
||||
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
|
||||
@@ -121,7 +124,7 @@ pub(crate) async fn execute(
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: SharedTurnDiffTracker,
|
||||
code: String,
|
||||
) -> Result<Vec<FunctionCallOutputContentItem>, FunctionCallError> {
|
||||
) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let exec = ExecContext {
|
||||
session,
|
||||
turn,
|
||||
@@ -140,8 +143,9 @@ async fn execute_node(
|
||||
source: String,
|
||||
enabled_tools: Vec<EnabledTool>,
|
||||
stored_values: HashMap<String, JsonValue>,
|
||||
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
|
||||
) -> Result<FunctionToolOutput, String> {
|
||||
let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
|
||||
let started_at = std::time::Instant::now();
|
||||
|
||||
let env = create_env(&exec.turn.shell_environment_policy, None);
|
||||
let mut cmd = tokio::process::Command::new(&node_path);
|
||||
@@ -190,7 +194,7 @@ async fn execute_node(
|
||||
.await?;
|
||||
|
||||
let mut stdout_lines = BufReader::new(stdout).lines();
|
||||
let mut final_content_items = None;
|
||||
let mut pending_result = None;
|
||||
while let Some(line) = stdout_lines
|
||||
.next_line()
|
||||
.await
|
||||
@@ -213,6 +217,7 @@ async fn execute_node(
|
||||
NodeToHostMessage::Result {
|
||||
content_items,
|
||||
stored_values,
|
||||
error_text,
|
||||
max_output_tokens_per_exec_call,
|
||||
} => {
|
||||
exec.session
|
||||
@@ -220,8 +225,9 @@ async fn execute_node(
|
||||
.code_mode_store
|
||||
.replace_stored_values(stored_values)
|
||||
.await;
|
||||
final_content_items = Some(truncate_code_mode_result(
|
||||
pending_result = Some((
|
||||
output_content_items_from_json_values(content_items)?,
|
||||
error_text,
|
||||
max_output_tokens_per_exec_call,
|
||||
));
|
||||
break;
|
||||
@@ -238,20 +244,39 @@ async fn execute_node(
|
||||
let stderr = stderr_task
|
||||
.await
|
||||
.map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;
|
||||
let wall_time = started_at.elapsed();
|
||||
let success = status.success();
|
||||
|
||||
match final_content_items {
|
||||
Some(content_items) if status.success() => Ok(content_items),
|
||||
Some(_) => Err(format_runner_failure(
|
||||
&format!("{PUBLIC_TOOL_NAME} execution failed"),
|
||||
status,
|
||||
&stderr,
|
||||
)),
|
||||
None => Err(format_runner_failure(
|
||||
&format!("{PUBLIC_TOOL_NAME} runner exited without returning a result"),
|
||||
status,
|
||||
&stderr,
|
||||
)),
|
||||
let Some((mut content_items, error_text, max_output_tokens_per_exec_call)) = pending_result
|
||||
else {
|
||||
let message = if stderr.is_empty() {
|
||||
format!("{PUBLIC_TOOL_NAME} runner exited without returning a result (status {status})")
|
||||
} else {
|
||||
stderr
|
||||
};
|
||||
return Err(message);
|
||||
};
|
||||
|
||||
if !success {
|
||||
let error_text = error_text.unwrap_or_else(|| {
|
||||
if stderr.is_empty() {
|
||||
format!("Process exited with status {status}")
|
||||
} else {
|
||||
stderr
|
||||
}
|
||||
});
|
||||
content_items.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("Script error:\n{error_text}"),
|
||||
});
|
||||
}
|
||||
|
||||
let mut content_items =
|
||||
truncate_code_mode_result(content_items, max_output_tokens_per_exec_call);
|
||||
prepend_script_status(&mut content_items, success, wall_time);
|
||||
Ok(FunctionToolOutput::from_content(
|
||||
content_items,
|
||||
Some(success),
|
||||
))
|
||||
}
|
||||
|
||||
async fn write_message(
|
||||
@@ -274,15 +299,21 @@ async fn write_message(
|
||||
.map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
|
||||
}
|
||||
|
||||
fn append_stderr(message: String, stderr: &str) -> String {
|
||||
if stderr.trim().is_empty() {
|
||||
return message;
|
||||
}
|
||||
format!("{message}\n\nnode stderr:\n{stderr}")
|
||||
}
|
||||
|
||||
fn format_runner_failure(message: &str, status: ExitStatus, stderr: &str) -> String {
|
||||
append_stderr(format!("{message} (status {status})"), stderr)
|
||||
fn prepend_script_status(
|
||||
content_items: &mut Vec<FunctionCallOutputContentItem>,
|
||||
success: bool,
|
||||
wall_time: Duration,
|
||||
) {
|
||||
let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
let header = format!(
|
||||
"{}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n",
|
||||
if success {
|
||||
"Script completed"
|
||||
} else {
|
||||
"Script failed"
|
||||
}
|
||||
);
|
||||
content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
|
||||
}
|
||||
|
||||
fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
|
||||
@@ -301,25 +332,17 @@ fn truncate_code_mode_result(
|
||||
max_output_tokens_per_exec_call: Option<usize>,
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
|
||||
let policy = TruncationPolicy::Tokens(max_output_tokens);
|
||||
if items
|
||||
.iter()
|
||||
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
|
||||
{
|
||||
let (mut truncated_items, original_token_count) =
|
||||
formatted_truncate_text_content_items_with_policy(
|
||||
&items,
|
||||
TruncationPolicy::Tokens(max_output_tokens),
|
||||
);
|
||||
if let Some(original_token_count) = original_token_count
|
||||
&& let Some(FunctionCallOutputContentItem::InputText { text }) =
|
||||
truncated_items.first_mut()
|
||||
{
|
||||
*text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
|
||||
}
|
||||
let (truncated_items, _) =
|
||||
formatted_truncate_text_content_items_with_policy(&items, policy);
|
||||
return truncated_items;
|
||||
}
|
||||
|
||||
truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
|
||||
truncate_function_output_items_with_policy(&items, policy)
|
||||
}
|
||||
|
||||
async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
|
||||
|
||||
@@ -104,6 +104,10 @@ function readContentItems(context) {
|
||||
}
|
||||
}
|
||||
|
||||
function formatErrorText(error) {
|
||||
return String(error && error.stack ? error.stack : error);
|
||||
}
|
||||
|
||||
function isValidIdentifier(name) {
|
||||
return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
|
||||
}
|
||||
@@ -378,11 +382,11 @@ async function main() {
|
||||
});
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
|
||||
await protocol.send({
|
||||
type: 'result',
|
||||
content_items: readContentItems(context),
|
||||
stored_values: state.storedValues,
|
||||
error_text: formatErrorText(error),
|
||||
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
|
||||
});
|
||||
process.exit(1);
|
||||
@@ -391,7 +395,7 @@ async function main() {
|
||||
|
||||
void main().catch(async (error) => {
|
||||
try {
|
||||
process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
|
||||
process.stderr.write(`${formatErrorText(error)}\n`);
|
||||
} finally {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
|
||||
@@ -48,7 +48,6 @@ impl ToolHandler for CodeModeHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let content_items = code_mode::execute(session, turn, tracker, code).await?;
|
||||
Ok(FunctionToolOutput::from_content(content_items, Some(true)))
|
||||
code_mode::execute(session, turn, tracker, code).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1621,7 +1621,7 @@ source: /[\s\S]+/
|
||||
enabled_tool_names.join(", ")
|
||||
};
|
||||
let description = format!(
|
||||
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
|
||||
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
|
||||
);
|
||||
|
||||
ToolSpec::Freeform(FreeformTool {
|
||||
|
||||
@@ -24,14 +24,35 @@ use std::fs;
|
||||
use std::time::Duration;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn custom_tool_output_text_and_success(
|
||||
fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
req.custom_tool_call_output(call_id)
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("custom tool output should be serialized as content items")
|
||||
.clone()
|
||||
}
|
||||
|
||||
fn text_item(items: &[Value], index: usize) -> &str {
|
||||
items[index]
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.expect("content item should be input_text")
|
||||
}
|
||||
|
||||
fn custom_tool_output_body_and_success(
|
||||
req: &ResponsesRequest,
|
||||
call_id: &str,
|
||||
) -> (String, Option<bool>) {
|
||||
let (output, success) = req
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success(call_id)
|
||||
.expect("custom tool output should be present");
|
||||
(output.unwrap_or_default(), success)
|
||||
let items = custom_tool_output_items(req, call_id);
|
||||
let output = items
|
||||
.iter()
|
||||
.skip(1)
|
||||
.filter_map(|item| item.get("text").and_then(Value::as_str))
|
||||
.collect();
|
||||
(output, success)
|
||||
}
|
||||
|
||||
async fn run_code_mode_turn(
|
||||
@@ -152,13 +173,16 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec call failed unexpectedly: {output}"
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
let parsed: Value = serde_json::from_str(text_item(&items, 1))?;
|
||||
assert!(
|
||||
parsed
|
||||
.get("chunk_id")
|
||||
@@ -201,22 +225,66 @@ add_content(JSON.stringify(await exec_command({
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec call failed unexpectedly: {output}"
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
Original\ token\ count:\ \d+\n
|
||||
Output:\n
|
||||
Total\ output\ lines:\ 1\n
|
||||
\n
|
||||
\{"chunk_id".*…\d+\ tokens\ truncated….*
|
||||
.*…\d+\ tokens\ truncated….*
|
||||
\z
|
||||
"#;
|
||||
assert_regex_match(expected_pattern, &output);
|
||||
assert_regex_match(expected_pattern, text_item(&items, 1));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to surface script failures",
|
||||
r#"
|
||||
add_content("before crash");
|
||||
add_content("still before crash");
|
||||
throw new Error("boom");
|
||||
"#,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 4);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&items, 1), "before crash");
|
||||
assert_eq!(text_item(&items, 2), "still before crash");
|
||||
assert_regex_match(
|
||||
r#"(?sx)
|
||||
\A
|
||||
Script\ error:\n
|
||||
Error:\ boom\n
|
||||
(?:\s+at\ .+\n?)+
|
||||
\z
|
||||
"#,
|
||||
text_item(&items, 3),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -239,7 +307,7 @@ output_text({ json: true });
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -270,14 +338,25 @@ output_text(circular);
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success("call-1")
|
||||
.expect("custom tool output should be present");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(true),
|
||||
"circular stringify unexpectedly succeeded"
|
||||
);
|
||||
assert!(output.contains("exec execution failed"));
|
||||
assert!(output.contains("Converting circular structure to JSON"));
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert!(text_item(&items, 1).contains("Script error:"));
|
||||
assert!(text_item(&items, 1).contains("Converting circular structure to JSON"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -301,28 +380,34 @@ output_image("data:image/png;base64,AAA");
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (_, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode image output failed unexpectedly"
|
||||
);
|
||||
assert_eq!(items.len(), 3);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert_eq!(
|
||||
req.custom_tool_call_output("call-1"),
|
||||
items[1],
|
||||
serde_json::json!({
|
||||
"type": "custom_tool_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": [
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg"
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
}
|
||||
]
|
||||
})
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg"
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
items[2],
|
||||
serde_json::json!({
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
}),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -345,11 +430,22 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
||||
run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success("call-1")
|
||||
.expect("custom tool output should be present");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec apply_patch call failed unexpectedly: {output}"
|
||||
"exec apply_patch call failed unexpectedly: {items:?}"
|
||||
);
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
|
||||
let file_path = test.cwd_path().join(file_name);
|
||||
@@ -381,7 +477,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -420,7 +516,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -464,7 +560,7 @@ add_content(
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -505,7 +601,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to call rmcp echo badly", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -562,7 +658,7 @@ add_content("stored");
|
||||
|
||||
let first_request = first_follow_up.single_request();
|
||||
let (first_output, first_success) =
|
||||
custom_tool_output_text_and_success(&first_request, "call-1");
|
||||
custom_tool_output_body_and_success(&first_request, "call-1");
|
||||
assert_ne!(
|
||||
first_success,
|
||||
Some(false),
|
||||
@@ -600,7 +696,7 @@ add_content(JSON.stringify(load("nb")));
|
||||
|
||||
let second_request = second_follow_up.single_request();
|
||||
let (second_output, second_success) =
|
||||
custom_tool_output_text_and_success(&second_request, "call-2");
|
||||
custom_tool_output_body_and_success(&second_request, "call-2");
|
||||
assert_ne!(
|
||||
second_success,
|
||||
Some(false),
|
||||
|
||||
Reference in New Issue
Block a user