update

2026-04-24 14:45:27 +00:00 · 2026-02-03 08:44:52 -08:00
parent 95cd861224
commit 806e490285
2 changed files with 43 additions and 22 deletions
--- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs
+++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs
@@ -355,18 +355,32 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<(
    .await??;

    let bodies = responses_bodies(&server).await?;
+    let output_value = bodies
+        .iter()
+        .find_map(|body| function_call_output_raw_output(body, call_id))
+        .context("expected function_call_output output in follow-up request")?;
+    assert_eq!(
+        output_value,
+        json!([
+            {
+                "type": "input_text",
+                "text": "dynamic-ok"
+            },
+            {
+                "type": "input_image",
+                "image_url": "data:image/png;base64,AAA"
+            }
+        ])
+    );
+
    let payload = bodies
        .iter()
        .find_map(|body| function_call_output_payload(body, call_id))
        .context("expected function_call_output in follow-up request")?;
-    let expected_payload = FunctionCallOutputPayload {
-        // `FunctionCallOutputPayload` deserializes item arrays by also storing
-        // a JSON string representation in `content`.
-        content: serde_json::to_string(&content_items)?,
-        content_items: Some(content_items),
-        success: None,
-    };
-    assert_eq!(payload, expected_payload);
+    assert_eq!(payload.content_items, Some(content_items.clone()));
+    assert_eq!(payload.success, None);
+    // The deserializer keeps a compatibility text mirror in `content`.
+    assert_eq!(payload.content, serde_json::to_string(&content_items)?);

    Ok(())
 }
@@ -398,6 +412,11 @@ fn find_tool<'a>(body: &'a Value, name: &str) -> Option<&'a Value> {
 }

 fn function_call_output_payload(body: &Value, call_id: &str) -> Option<FunctionCallOutputPayload> {
+    function_call_output_raw_output(body, call_id)
+        .and_then(|output| serde_json::from_value(output).ok())
+}
+
+fn function_call_output_raw_output(body: &Value, call_id: &str) -> Option<Value> {
    body.get("input")
        .and_then(Value::as_array)
        .and_then(|items| {
@@ -408,7 +427,6 @@ fn function_call_output_payload(body: &Value, call_id: &str) -> Option<FunctionC
        })
        .and_then(|item| item.get("output"))
        .cloned()
-        .and_then(|output| serde_json::from_value(output).ok())
 }

 fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -118,11 +118,11 @@ pub enum ResponseItem {
        arguments: String,
        call_id: String,
    },
-    // NOTE: The input schema for `function_call_output` objects that clients send to the
-    // OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the
-    // SSE stream. When *sending* we must wrap the string output inside an object that includes a
-    // required `success` boolean. To ensure we serialize exactly the expected shape we introduce
-    // a dedicated payload struct and flatten it here.
+    // NOTE: The `output` field for `function_call_output` uses a dedicated payload type with
+    // custom serialization. On the wire it is either:
+    //   • a plain string (`content`)
+    //   • an array of structured content items (`content_items`)
+    // We keep this behavior centralized in `FunctionCallOutputPayload`.
    FunctionCallOutput {
        call_id: String,
        output: FunctionCallOutputPayload,
@@ -741,11 +741,12 @@ pub enum FunctionCallOutputContentItem {

 /// The payload we send back to OpenAI when reporting a tool call result.
 ///
-/// `content` preserves the historical plain-string payload so downstream
-/// integrations (tests, logging, etc.) can keep treating tool output as
-/// `String`. When an MCP server returns richer data we additionally populate
-/// `content_items` with the structured form that the Responses/Chat
-/// Completions APIs understand.
+/// `content` preserves a historical plain-text representation that downstream
+/// code still uses for logs/history/tests.
+///
+/// `content_items` holds structured tool output. When present, custom
+/// serialization sends these items directly on the wire as the `output` value
+/// (an array), rather than serializing `content`.
 #[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)]
 pub struct FunctionCallOutputPayload {
    pub content: String,
@@ -761,9 +762,9 @@ enum FunctionCallOutputPayloadSerde {
    Items(Vec<FunctionCallOutputContentItem>),
 }

-// The Responses API expects two *different* shapes depending on success vs failure:
-//   • success → output is a plain string (no nested object)
-//   • failure → output is an object { content, success:false }
+// `function_call_output.output` is encoded as either:
+//   • an array of structured content items, when `content_items` is present
+//   • a plain string, otherwise
 impl Serialize for FunctionCallOutputPayload {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
@@ -788,6 +789,8 @@ impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
                ..Default::default()
            }),
            FunctionCallOutputPayloadSerde::Items(items) => {
+                // Preserve a text mirror for compatibility with legacy callers
+                // that still inspect `content`.
                let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?;
                Ok(FunctionCallOutputPayload {
                    content,