mirror of
https://github.com/openai/codex.git
synced 2026-04-28 16:45:54 +00:00
Support multimodal custom tool outputs (#12948)
## Summary This changes `custom_tool_call_output` to use the same output payload shape as `function_call_output`, so freeform tools can return either plain text or structured content items. The main goal is to let `js_repl` return image content from nested `view_image` calls in its own `custom_tool_call_output`, instead of relying on a separate injected message. ## What changed - Changed `custom_tool_call_output.output` from `string` to `FunctionCallOutputPayload` - Updated freeform tool plumbing to preserve structured output bodies - Updated `js_repl` to aggregate nested tool content items and attach them to the outer `js_repl` result - Removed the old `js_repl` special case that injected `view_image` results as a separate pending user image message - Updated normalization/history/truncation paths to handle multimodal `custom_tool_call_output` - Regenerated app-server protocol schema artifacts ## Behavior Direct `view_image` calls still return a `function_call_output` with image content. When `view_image` is called inside `js_repl`, the outer `js_repl` `custom_tool_call_output` now carries: - an `input_text` item if the JS produced text output - one or more `input_image` items from nested tool results So the nested image result now stays inside the `js_repl` tool output instead of being injected as a separate message. ## Compatibility This is intended to be backward-compatible for resumed conversations. Older histories that stored `custom_tool_call_output.output` as a plain string still deserialize correctly, and older histories that used the previous injected-image-message flow also continue to resume. Added regression coverage for resuming a pre-change rollout containing: - string-valued `custom_tool_call_output` - legacy injected image message history #### [git stack](https://github.com/magus/git-stack-cli) - 👉 `1` https://github.com/openai/codex/pull/12948
This commit is contained in:
committed by
GitHub
parent
f90e97e414
commit
7e980d7db6
@@ -35,6 +35,10 @@ use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SessionMeta;
|
||||
use codex_protocol::protocol::SessionMetaLine;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::apps_test_server::AppsTestServer;
|
||||
@@ -344,6 +348,144 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
assert!(pos_environment < pos_new_user);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Early js_repl builds persisted image tool results as two separate rollout items:
|
||||
// a string-valued custom_tool_call_output plus a standalone user input_image message.
|
||||
// Current image tests cover today's shapes; this keeps resume compatibility for that
|
||||
// legacy rollout representation.
|
||||
let legacy_custom_tool_call = ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "legacy-js-call".to_string(),
|
||||
name: "js_repl".to_string(),
|
||||
input: "console.log('legacy image flow')".to_string(),
|
||||
};
|
||||
let legacy_image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
|
||||
let rollout = vec![
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:00.000Z".to_string(),
|
||||
item: RolloutItem::SessionMeta(SessionMetaLine {
|
||||
meta: SessionMeta {
|
||||
id: ThreadId::default(),
|
||||
timestamp: "2024-01-01T00:00:00Z".to_string(),
|
||||
cwd: ".".into(),
|
||||
originator: "test_originator".to_string(),
|
||||
cli_version: "test_version".to_string(),
|
||||
model_provider: Some("test-provider".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
git: None,
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:01.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(legacy_custom_tool_call),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:02.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput {
|
||||
call_id: "legacy-js-call".to_string(),
|
||||
output: FunctionCallOutputPayload::from_text("legacy js_repl stdout".to_string()),
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:03.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: legacy_image_url.to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
|
||||
let tmpdir = TempDir::new().unwrap();
|
||||
let session_path = tmpdir
|
||||
.path()
|
||||
.join("resume-legacy-js-repl-image-rollout.jsonl");
|
||||
let mut f = std::fs::File::create(&session_path).unwrap();
|
||||
for line in rollout {
|
||||
writeln!(f, "{}", serde_json::to_string(&line).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let resp_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![ev_response_created("resp1"), ev_completed("resp1")]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let codex_home = Arc::new(TempDir::new().unwrap());
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let test = builder
|
||||
.resume(&server, codex_home, session_path.clone())
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
test.submit_turn("after resume").await.unwrap();
|
||||
|
||||
let input = resp_mock.single_request().input();
|
||||
|
||||
let legacy_output_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("custom_tool_call_output")
|
||||
&& item.get("call_id").and_then(|value| value.as_str()) == Some("legacy-js-call")
|
||||
})
|
||||
.expect("legacy custom tool output should be replayed");
|
||||
assert_eq!(
|
||||
input[legacy_output_index]
|
||||
.get("output")
|
||||
.and_then(|value| value.as_str()),
|
||||
Some("legacy js_repl stdout")
|
||||
);
|
||||
|
||||
let legacy_image_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|value| value.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|value| value.as_array())
|
||||
.is_some_and(|content| {
|
||||
content.iter().any(|entry| {
|
||||
entry.get("type").and_then(|value| value.as_str())
|
||||
== Some("input_image")
|
||||
&& entry.get("image_url").and_then(|value| value.as_str())
|
||||
== Some(legacy_image_url)
|
||||
})
|
||||
})
|
||||
})
|
||||
.expect("legacy injected image message should be replayed");
|
||||
|
||||
let new_user_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|value| value.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|value| value.as_array())
|
||||
.is_some_and(|content| {
|
||||
content.iter().any(|entry| {
|
||||
entry.get("type").and_then(|value| value.as_str()) == Some("input_text")
|
||||
&& entry.get("text").and_then(|value| value.as_str())
|
||||
== Some("after resume")
|
||||
})
|
||||
})
|
||||
})
|
||||
.expect("new user message should be present");
|
||||
|
||||
assert!(legacy_output_index < new_user_index);
|
||||
assert!(legacy_image_index < new_user_index);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn includes_conversation_id_and_model_headers_in_request() {
|
||||
skip_if_no_network!();
|
||||
@@ -1565,7 +1707,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
||||
});
|
||||
prompt.input.push(ResponseItem::CustomToolCallOutput {
|
||||
call_id: "custom-tool-call-id".into(),
|
||||
output: "ok".into(),
|
||||
output: FunctionCallOutputPayload::from_text("ok".into()),
|
||||
});
|
||||
|
||||
let mut stream = client_session
|
||||
|
||||
@@ -358,40 +358,26 @@ console.log(out.output?.body?.text ?? "");
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let (js_repl_output, js_repl_success) = req
|
||||
.custom_tool_call_output_content_and_success(call_id)
|
||||
.expect("custom tool output present");
|
||||
let js_repl_output = js_repl_output.expect("custom tool output text present");
|
||||
assert_ne!(
|
||||
js_repl_success,
|
||||
Some(false),
|
||||
"js_repl call failed unexpectedly: {js_repl_output}"
|
||||
let body = req.body_json();
|
||||
assert_eq!(
|
||||
image_messages(&body).len(),
|
||||
0,
|
||||
"js_repl view_image should not inject a pending input image message"
|
||||
);
|
||||
|
||||
let body = req.body_json();
|
||||
let image_messages = image_messages(&body);
|
||||
assert_eq!(
|
||||
image_messages.len(),
|
||||
1,
|
||||
"js_repl view_image should inject exactly one pending input image message"
|
||||
);
|
||||
let image_message = image_messages
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("pending input image message not included in request");
|
||||
let image_url = image_message
|
||||
.get("content")
|
||||
let custom_output = req.custom_tool_call_output(call_id);
|
||||
let output_items = custom_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|content| {
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_image") {
|
||||
span.get("image_url").and_then(Value::as_str)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.expect("custom_tool_call_output should be a content item array");
|
||||
let image_url = output_items
|
||||
.iter()
|
||||
.find_map(|item| {
|
||||
(item.get("type").and_then(Value::as_str) == Some("input_image"))
|
||||
.then(|| item.get("image_url").and_then(Value::as_str))
|
||||
.flatten()
|
||||
})
|
||||
.expect("image_url present");
|
||||
.expect("image_url present in js_repl custom tool output");
|
||||
assert!(
|
||||
image_url.starts_with("data:image/png;base64,"),
|
||||
"expected png data URL, got {image_url}"
|
||||
|
||||
Reference in New Issue
Block a user