From 46391f7efa0aa06a8074f47bc01d2651d95889b0 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 26 May 2026 15:49:37 -0700 Subject: [PATCH] [codex] remove plain image wrapper spans (#24652) ## Why Remote image submissions currently wrap native `input_image` spans in literal `` and `` text spans. Those extra prompt tokens add structure without providing label or routing information. ## What Changed - Serialize `UserInput::Image` directly as an `input_image` content span. - Preserve named local-image framing and legacy wrapper parsing for labeled attachments and existing histories. - Update existing request-shape expectations for drag-and-drop images, model switching, and compaction. ## Validation - `just test -p codex-protocol` - Focused `codex-core` run covering `drag_drop_image_persists_rollout_request_shape`, `model_change_from_image_to_text_strips_prior_image_content`, and `snapshot_request_shape_pre_turn_compaction_including_incoming_user_message` ## Notes - A broader `just test -p codex-core` run was attempted; the affected tests passed, while the overall run failed in unrelated CLI, MCP, and tooling tests plus a `thread_manager` timeout. --- codex-rs/core/tests/suite/image_rollout.rs | 6 --- codex-rs/core/tests/suite/model_switching.rs | 13 ----- ..._compaction_including_incoming_shapes.snap | 8 ++- codex-rs/protocol/src/models.rs | 54 +++++-------------- 4 files changed, 16 insertions(+), 65 deletions(-) diff --git a/codex-rs/core/tests/suite/image_rollout.rs b/codex-rs/core/tests/suite/image_rollout.rs index 67a79d603f..87eb03079e 100644 --- a/codex-rs/core/tests/suite/image_rollout.rs +++ b/codex-rs/core/tests/suite/image_rollout.rs @@ -257,16 +257,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()> id: None, role: "user".to_string(), content: vec![ - ContentItem::InputText { - text: codex_protocol::models::image_open_tag_text(), - }, ContentItem::InputImage { image_url, detail: Some(DEFAULT_IMAGE_DETAIL), }, - ContentItem::InputText { - text: codex_protocol::models::image_close_tag_text(), - }, ContentItem::InputText { text: "dropped image".to_string(), }, diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index fd237a8d5c..3c4dd40b32 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -555,19 +555,6 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result< .any(|text| text == "image content omitted because you do not support image input"), "second request should include the image-omitted placeholder text" ); - assert!( - second_user_texts - .iter() - .any(|text| text == &codex_protocol::models::image_open_tag_text()), - "second request should preserve the image open tag text" - ); - assert!( - second_user_texts - .iter() - .any(|text| text == &codex_protocol::models::image_close_tag_text()), - "second request should preserve the image close tag text" - ); - Ok(()) } diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap b/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap index 2edd63d3d2..679081353e 100644 --- a/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap +++ b/codex-rs/core/tests/suite/snapshots/all__suite__compact__pre_turn_compaction_including_incoming_shapes.snap @@ -19,8 +19,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif 02:message/user:\nPRE_TURN_SUMMARY 03:message/developer: 04:message/user: -05:message/user[4]: - [01] - [02] - [03] - [04] USER_THREE +05:message/user[2]: + [01] + [02] USER_THREE diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 6112ea8a6c..85c5a5d704 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -1238,18 +1238,10 @@ impl From> for ResponseInputItem { UserInput::Image { image_url, detail } => { image_index += 1; let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL); - vec![ - ContentItem::InputText { - text: image_open_tag_text(), - }, - ContentItem::InputImage { - image_url, - detail: Some(detail), - }, - ContentItem::InputText { - text: image_close_tag_text(), - }, - ] + vec![ContentItem::InputImage { + image_url, + detail: Some(detail), + }] } UserInput::LocalImage { path, detail } => { image_index += 1; @@ -2622,7 +2614,7 @@ mod tests { } #[test] - fn wraps_image_user_input_with_tags() -> Result<()> { + fn serializes_image_user_input_without_tags() -> Result<()> { let image_url = "data:image/png;base64,abc".to_string(); let item = ResponseInputItem::from(vec![UserInput::Image { @@ -2632,18 +2624,10 @@ mod tests { match item { ResponseInputItem::Message { content, .. } => { - let expected = vec![ - ContentItem::InputText { - text: image_open_tag_text(), - }, - ContentItem::InputImage { - image_url, - detail: Some(DEFAULT_IMAGE_DETAIL), - }, - ContentItem::InputText { - text: image_close_tag_text(), - }, - ]; + let expected = vec![ContentItem::InputImage { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }]; assert_eq!(content, expected); } other => panic!("expected message response but got {other:?}"), @@ -2664,7 +2648,7 @@ mod tests { match item { ResponseInputItem::Message { content, .. } => { assert_eq!( - content.get(1), + content.first(), Some(&ContentItem::InputImage { image_url, detail: Some(ImageDetail::Original), @@ -2862,35 +2846,23 @@ mod tests { ResponseInputItem::Message { content, .. } => { assert_eq!( content.first(), - Some(&ContentItem::InputText { - text: image_open_tag_text(), - }) - ); - assert_eq!( - content.get(1), Some(&ContentItem::InputImage { image_url, detail: Some(DEFAULT_IMAGE_DETAIL), }) ); assert_eq!( - content.get(2), - Some(&ContentItem::InputText { - text: image_close_tag_text(), - }) - ); - assert_eq!( - content.get(3), + content.get(1), Some(&ContentItem::InputText { text: local_image_open_tag_text(/*label_number*/ 2), }) ); assert!(matches!( - content.get(4), + content.get(2), Some(ContentItem::InputImage { .. }) )); assert_eq!( - content.get(5), + content.get(3), Some(&ContentItem::InputText { text: image_close_tag_text(), })