mirror of
https://github.com/openai/codex.git
synced 2026-04-26 15:45:02 +00:00
Prefix code mode output with success or failure message and include error stack (#14272)
This commit is contained in:
committed by
Michael Bolin
parent
da74da6684
commit
01792a4c61
@@ -24,14 +24,35 @@ use std::fs;
|
||||
use std::time::Duration;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn custom_tool_output_text_and_success(
|
||||
fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
req.custom_tool_call_output(call_id)
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("custom tool output should be serialized as content items")
|
||||
.clone()
|
||||
}
|
||||
|
||||
fn text_item(items: &[Value], index: usize) -> &str {
|
||||
items[index]
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.expect("content item should be input_text")
|
||||
}
|
||||
|
||||
fn custom_tool_output_body_and_success(
|
||||
req: &ResponsesRequest,
|
||||
call_id: &str,
|
||||
) -> (String, Option<bool>) {
|
||||
let (output, success) = req
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success(call_id)
|
||||
.expect("custom tool output should be present");
|
||||
(output.unwrap_or_default(), success)
|
||||
let items = custom_tool_output_items(req, call_id);
|
||||
let output = items
|
||||
.iter()
|
||||
.skip(1)
|
||||
.filter_map(|item| item.get("text").and_then(Value::as_str))
|
||||
.collect();
|
||||
(output, success)
|
||||
}
|
||||
|
||||
async fn run_code_mode_turn(
|
||||
@@ -152,13 +173,16 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec call failed unexpectedly: {output}"
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
let parsed: Value = serde_json::from_str(text_item(&items, 1))?;
|
||||
assert!(
|
||||
parsed
|
||||
.get("chunk_id")
|
||||
@@ -201,22 +225,66 @@ add_content(JSON.stringify(await exec_command({
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec call failed unexpectedly: {output}"
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
Original\ token\ count:\ \d+\n
|
||||
Output:\n
|
||||
Total\ output\ lines:\ 1\n
|
||||
\n
|
||||
\{"chunk_id".*…\d+\ tokens\ truncated….*
|
||||
.*…\d+\ tokens\ truncated….*
|
||||
\z
|
||||
"#;
|
||||
assert_regex_match(expected_pattern, &output);
|
||||
assert_regex_match(expected_pattern, text_item(&items, 1));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to surface script failures",
|
||||
r#"
|
||||
add_content("before crash");
|
||||
add_content("still before crash");
|
||||
throw new Error("boom");
|
||||
"#,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 4);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&items, 1), "before crash");
|
||||
assert_eq!(text_item(&items, 2), "still before crash");
|
||||
assert_regex_match(
|
||||
r#"(?sx)
|
||||
\A
|
||||
Script\ error:\n
|
||||
Error:\ boom\n
|
||||
(?:\s+at\ .+\n?)+
|
||||
\z
|
||||
"#,
|
||||
text_item(&items, 3),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -239,7 +307,7 @@ output_text({ json: true });
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -270,14 +338,25 @@ output_text(circular);
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success("call-1")
|
||||
.expect("custom tool output should be present");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(true),
|
||||
"circular stringify unexpectedly succeeded"
|
||||
);
|
||||
assert!(output.contains("exec execution failed"));
|
||||
assert!(output.contains("Converting circular structure to JSON"));
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert!(text_item(&items, 1).contains("Script error:"));
|
||||
assert!(text_item(&items, 1).contains("Converting circular structure to JSON"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -301,28 +380,34 @@ output_image("data:image/png;base64,AAA");
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (_, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode image output failed unexpectedly"
|
||||
);
|
||||
assert_eq!(items.len(), 3);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert_eq!(
|
||||
req.custom_tool_call_output("call-1"),
|
||||
items[1],
|
||||
serde_json::json!({
|
||||
"type": "custom_tool_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": [
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg"
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
}
|
||||
]
|
||||
})
|
||||
"type": "input_image",
|
||||
"image_url": "https://example.com/image.jpg"
|
||||
}),
|
||||
);
|
||||
assert_eq!(
|
||||
items[2],
|
||||
serde_json::json!({
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAA"
|
||||
}),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -345,11 +430,22 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
||||
run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let (_, success) = req
|
||||
.custom_tool_call_output_content_and_success("call-1")
|
||||
.expect("custom tool output should be present");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec apply_patch call failed unexpectedly: {output}"
|
||||
"exec apply_patch call failed unexpectedly: {items:?}"
|
||||
);
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
|
||||
let file_path = test.cwd_path().join(file_name);
|
||||
@@ -381,7 +477,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -420,7 +516,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -464,7 +560,7 @@ add_content(
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -505,7 +601,7 @@ add_content(
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to call rmcp echo badly", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
@@ -562,7 +658,7 @@ add_content("stored");
|
||||
|
||||
let first_request = first_follow_up.single_request();
|
||||
let (first_output, first_success) =
|
||||
custom_tool_output_text_and_success(&first_request, "call-1");
|
||||
custom_tool_output_body_and_success(&first_request, "call-1");
|
||||
assert_ne!(
|
||||
first_success,
|
||||
Some(false),
|
||||
@@ -600,7 +696,7 @@ add_content(JSON.stringify(load("nb")));
|
||||
|
||||
let second_request = second_follow_up.single_request();
|
||||
let (second_output, second_success) =
|
||||
custom_tool_output_text_and_success(&second_request, "call-2");
|
||||
custom_tool_output_body_and_success(&second_request, "call-2");
|
||||
assert_ne!(
|
||||
second_success,
|
||||
Some(false),
|
||||
|
||||
Reference in New Issue
Block a user