[codex] remove plain image wrapper spans (#24652)

## Why Remote image submissions currently wrap native `input_image` spans in literal `<image>` and `</image>` text spans. Those extra prompt tokens add structure without providing label or routing information. ## What Changed - Serialize `UserInput::Image` directly as an `input_image` content span. - Preserve named local-image framing and legacy wrapper parsing for labeled attachments and existing histories. - Update existing request-shape expectations for drag-and-drop images, model switching, and compaction. ## Validation - `just test -p codex-protocol` - Focused `codex-core` run covering `drag_drop_image_persists_rollout_request_shape`, `model_change_from_image_to_text_strips_prior_image_content`, and `snapshot_request_shape_pre_turn_compaction_including_incoming_user_message` ## Notes - A broader `just test -p codex-core` run was attempted; the affected tests passed, while the overall run failed in unrelated CLI, MCP, and tooling tests plus a `thread_manager` timeout.
2026-05-28 15:00:16 +00:00 · 2026-05-26 15:49:37 -07:00
parent 0a6bc4e687
commit 46391f7efa
4 changed files with 16 additions and 65 deletions
--- a/codex-rs/core/tests/suite/image_rollout.rs
+++ b/codex-rs/core/tests/suite/image_rollout.rs
@@ -257,16 +257,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()>
        id: None,
        role: "user".to_string(),
        content: vec![
-            ContentItem::InputText {
-                text: codex_protocol::models::image_open_tag_text(),
-            },
            ContentItem::InputImage {
                image_url,
                detail: Some(DEFAULT_IMAGE_DETAIL),
            },
-            ContentItem::InputText {
-                text: codex_protocol::models::image_close_tag_text(),
-            },
            ContentItem::InputText {
                text: "dropped image".to_string(),
            },
--- a/codex-rs/core/tests/suite/model_switching.rs
+++ b/codex-rs/core/tests/suite/model_switching.rs
@@ -555,19 +555,6 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
            .any(|text| text == "image content omitted because you do not support image input"),
        "second request should include the image-omitted placeholder text"
    );
-    assert!(
-        second_user_texts
-            .iter()
-            .any(|text| text == &codex_protocol::models::image_open_tag_text()),
-        "second request should preserve the image open tag text"
-    );
-    assert!(
-        second_user_texts
-            .iter()
-            .any(|text| text == &codex_protocol::models::image_close_tag_text()),
-        "second request should preserve the image close tag text"
-    );
-
    Ok(())
 }

--- a/codex-rs/core/tests/suite/snapshots/allsuitecompact__pre_turn_compaction_including_incoming_shapes.snap
+++ b/codex-rs/core/tests/suite/snapshots/allsuitecompact__pre_turn_compaction_including_incoming_shapes.snap
@@ -19,8 +19,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
 02:message/user:<COMPACTION_SUMMARY>\nPRE_TURN_SUMMARY
 03:message/developer:<PERMISSIONS_INSTRUCTIONS>
 04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
-05:message/user[4]:
-    [01] <image>
-    [02] <input_image:detail,image_url>
-    [03] </image>
-    [04] USER_THREE
+05:message/user[2]:
+    [01] <input_image:detail,image_url>
+    [02] USER_THREE
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -1238,18 +1238,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
                    UserInput::Image { image_url, detail } => {
                        image_index += 1;
                        let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
-                        vec![
-                            ContentItem::InputText {
-                                text: image_open_tag_text(),
-                            },
-                            ContentItem::InputImage {
-                                image_url,
-                                detail: Some(detail),
-                            },
-                            ContentItem::InputText {
-                                text: image_close_tag_text(),
-                            },
-                        ]
+                        vec![ContentItem::InputImage {
+                            image_url,
+                            detail: Some(detail),
+                        }]
                    }
                    UserInput::LocalImage { path, detail } => {
                        image_index += 1;
@@ -2622,7 +2614,7 @@ mod tests {
    }

    #[test]
-    fn wraps_image_user_input_with_tags() -> Result<()> {
+    fn serializes_image_user_input_without_tags() -> Result<()> {
        let image_url = "data:image/png;base64,abc".to_string();

        let item = ResponseInputItem::from(vec![UserInput::Image {
@@ -2632,18 +2624,10 @@ mod tests {

        match item {
            ResponseInputItem::Message { content, .. } => {
-                let expected = vec![
-                    ContentItem::InputText {
-                        text: image_open_tag_text(),
-                    },
-                    ContentItem::InputImage {
-                        image_url,
-                        detail: Some(DEFAULT_IMAGE_DETAIL),
-                    },
-                    ContentItem::InputText {
-                        text: image_close_tag_text(),
-                    },
-                ];
+                let expected = vec![ContentItem::InputImage {
+                    image_url,
+                    detail: Some(DEFAULT_IMAGE_DETAIL),
+                }];
                assert_eq!(content, expected);
            }
            other => panic!("expected message response but got {other:?}"),
@@ -2664,7 +2648,7 @@ mod tests {
        match item {
            ResponseInputItem::Message { content, .. } => {
                assert_eq!(
-                    content.get(1),
+                    content.first(),
                    Some(&ContentItem::InputImage {
                        image_url,
                        detail: Some(ImageDetail::Original),
@@ -2862,35 +2846,23 @@ mod tests {
            ResponseInputItem::Message { content, .. } => {
                assert_eq!(
                    content.first(),
-                    Some(&ContentItem::InputText {
-                        text: image_open_tag_text(),
-                    })
-                );
-                assert_eq!(
-                    content.get(1),
                    Some(&ContentItem::InputImage {
                        image_url,
                        detail: Some(DEFAULT_IMAGE_DETAIL),
                    })
                );
                assert_eq!(
-                    content.get(2),
-                    Some(&ContentItem::InputText {
-                        text: image_close_tag_text(),
-                    })
-                );
-                assert_eq!(
-                    content.get(3),
+                    content.get(1),
                    Some(&ContentItem::InputText {
                        text: local_image_open_tag_text(/*label_number*/ 2),
                    })
                );
                assert!(matches!(
-                    content.get(4),
+                    content.get(2),
                    Some(ContentItem::InputImage { .. })
                ));
                assert_eq!(
-                    content.get(5),
+                    content.get(3),
                    Some(&ContentItem::InputText {
                        text: image_close_tag_text(),
                    })