mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
Support multimodal custom tool outputs (#12948)
## Summary This changes `custom_tool_call_output` to use the same output payload shape as `function_call_output`, so freeform tools can return either plain text or structured content items. The main goal is to let `js_repl` return image content from nested `view_image` calls in its own `custom_tool_call_output`, instead of relying on a separate injected message. ## What changed - Changed `custom_tool_call_output.output` from `string` to `FunctionCallOutputPayload` - Updated freeform tool plumbing to preserve structured output bodies - Updated `js_repl` to aggregate nested tool content items and attach them to the outer `js_repl` result - Removed the old `js_repl` special case that injected `view_image` results as a separate pending user image message - Updated normalization/history/truncation paths to handle multimodal `custom_tool_call_output` - Regenerated app-server protocol schema artifacts ## Behavior Direct `view_image` calls still return a `function_call_output` with image content. When `view_image` is called inside `js_repl`, the outer `js_repl` `custom_tool_call_output` now carries: - an `input_text` item if the JS produced text output - one or more `input_image` items from nested tool results So the nested image result now stays inside the `js_repl` tool output instead of being injected as a separate message. ## Compatibility This is intended to be backward-compatible for resumed conversations. Older histories that stored `custom_tool_call_output.output` as a plain string still deserialize correctly, and older histories that used the previous injected-image-message flow also continue to resume. Added regression coverage for resuming a pre-change rollout containing: - string-valued `custom_tool_call_output` - legacy injected image message history #### [git stack](https://github.com/magus/git-stack-cli) - 👉 `1` https://github.com/openai/codex/pull/12948
This commit is contained in:
committed by
GitHub
parent
f90e97e414
commit
7e980d7db6
@@ -35,6 +35,10 @@ use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SessionMeta;
|
||||
use codex_protocol::protocol::SessionMetaLine;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::apps_test_server::AppsTestServer;
|
||||
@@ -344,6 +348,144 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
assert!(pos_environment < pos_new_user);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Early js_repl builds persisted image tool results as two separate rollout items:
|
||||
// a string-valued custom_tool_call_output plus a standalone user input_image message.
|
||||
// Current image tests cover today's shapes; this keeps resume compatibility for that
|
||||
// legacy rollout representation.
|
||||
let legacy_custom_tool_call = ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "legacy-js-call".to_string(),
|
||||
name: "js_repl".to_string(),
|
||||
input: "console.log('legacy image flow')".to_string(),
|
||||
};
|
||||
let legacy_image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
|
||||
let rollout = vec![
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:00.000Z".to_string(),
|
||||
item: RolloutItem::SessionMeta(SessionMetaLine {
|
||||
meta: SessionMeta {
|
||||
id: ThreadId::default(),
|
||||
timestamp: "2024-01-01T00:00:00Z".to_string(),
|
||||
cwd: ".".into(),
|
||||
originator: "test_originator".to_string(),
|
||||
cli_version: "test_version".to_string(),
|
||||
model_provider: Some("test-provider".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
git: None,
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:01.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(legacy_custom_tool_call),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:02.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput {
|
||||
call_id: "legacy-js-call".to_string(),
|
||||
output: FunctionCallOutputPayload::from_text("legacy js_repl stdout".to_string()),
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:03.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: legacy_image_url.to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
|
||||
let tmpdir = TempDir::new().unwrap();
|
||||
let session_path = tmpdir
|
||||
.path()
|
||||
.join("resume-legacy-js-repl-image-rollout.jsonl");
|
||||
let mut f = std::fs::File::create(&session_path).unwrap();
|
||||
for line in rollout {
|
||||
writeln!(f, "{}", serde_json::to_string(&line).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let resp_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![ev_response_created("resp1"), ev_completed("resp1")]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let codex_home = Arc::new(TempDir::new().unwrap());
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let test = builder
|
||||
.resume(&server, codex_home, session_path.clone())
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
test.submit_turn("after resume").await.unwrap();
|
||||
|
||||
let input = resp_mock.single_request().input();
|
||||
|
||||
let legacy_output_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("custom_tool_call_output")
|
||||
&& item.get("call_id").and_then(|value| value.as_str()) == Some("legacy-js-call")
|
||||
})
|
||||
.expect("legacy custom tool output should be replayed");
|
||||
assert_eq!(
|
||||
input[legacy_output_index]
|
||||
.get("output")
|
||||
.and_then(|value| value.as_str()),
|
||||
Some("legacy js_repl stdout")
|
||||
);
|
||||
|
||||
let legacy_image_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|value| value.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|value| value.as_array())
|
||||
.is_some_and(|content| {
|
||||
content.iter().any(|entry| {
|
||||
entry.get("type").and_then(|value| value.as_str())
|
||||
== Some("input_image")
|
||||
&& entry.get("image_url").and_then(|value| value.as_str())
|
||||
== Some(legacy_image_url)
|
||||
})
|
||||
})
|
||||
})
|
||||
.expect("legacy injected image message should be replayed");
|
||||
|
||||
let new_user_index = input
|
||||
.iter()
|
||||
.position(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|value| value.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|value| value.as_array())
|
||||
.is_some_and(|content| {
|
||||
content.iter().any(|entry| {
|
||||
entry.get("type").and_then(|value| value.as_str()) == Some("input_text")
|
||||
&& entry.get("text").and_then(|value| value.as_str())
|
||||
== Some("after resume")
|
||||
})
|
||||
})
|
||||
})
|
||||
.expect("new user message should be present");
|
||||
|
||||
assert!(legacy_output_index < new_user_index);
|
||||
assert!(legacy_image_index < new_user_index);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn includes_conversation_id_and_model_headers_in_request() {
|
||||
skip_if_no_network!();
|
||||
@@ -1565,7 +1707,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
||||
});
|
||||
prompt.input.push(ResponseItem::CustomToolCallOutput {
|
||||
call_id: "custom-tool-call-id".into(),
|
||||
output: "ok".into(),
|
||||
output: FunctionCallOutputPayload::from_text("ok".into()),
|
||||
});
|
||||
|
||||
let mut stream = client_session
|
||||
|
||||
Reference in New Issue
Block a user