Wrap Image UserInput in <image> tags as well

2026-04-24 14:45:27 +00:00 · 2026-01-09 17:40:45 -08:00
parent 20e4a2cf3f
commit 1c4e08caf9
2 changed files with 97 additions and 5 deletions
--- a/codex-rs/core/src/event_mapping.rs
+++ b/codex-rs/core/src/event_mapping.rs
@@ -9,6 +9,8 @@ use codex_protocol::models::ReasoningItemContent;
 use codex_protocol::models::ReasoningItemReasoningSummary;
 use codex_protocol::models::ResponseItem;
 use codex_protocol::models::WebSearchAction;
+use codex_protocol::models::is_image_close_tag_text;
+use codex_protocol::models::is_image_open_tag_text;
 use codex_protocol::models::is_local_image_close_tag_text;
 use codex_protocol::models::is_local_image_open_tag_text;
 use codex_protocol::user_input::UserInput;
@@ -37,10 +39,10 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
    for (idx, content_item) in message.iter().enumerate() {
        match content_item {
            ContentItem::InputText { text } => {
-                if is_local_image_open_tag_text(text)
+                if (is_local_image_open_tag_text(text) || is_image_open_tag_text(text))
                    && (matches!(message.get(idx + 1), Some(ContentItem::InputImage { .. })))
                    || (idx > 0
-                        && is_local_image_close_tag_text(text)
+                        && (is_local_image_close_tag_text(text) || is_image_close_tag_text(text))
                        && matches!(message.get(idx - 1), Some(ContentItem::InputImage { .. })))
                {
                    continue;
@@ -224,6 +226,43 @@ mod tests {
        }
    }

+    #[test]
+    fn skips_unnamed_image_label_text() {
+        let image_url = "data:image/png;base64,abc".to_string();
+        let label = codex_protocol::models::image_open_tag_text();
+        let user_text = "Please review this image.".to_string();
+
+        let item = ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![
+                ContentItem::InputText { text: label },
+                ContentItem::InputImage {
+                    image_url: image_url.clone(),
+                },
+                ContentItem::InputText {
+                    text: codex_protocol::models::image_close_tag_text(),
+                },
+                ContentItem::InputText {
+                    text: user_text.clone(),
+                },
+            ],
+        };
+
+        let turn_item = parse_turn_item(&item).expect("expected user message turn item");
+
+        match turn_item {
+            TurnItem::UserMessage(user) => {
+                let expected_content = vec![
+                    UserInput::Image { image_url },
+                    UserInput::Text { text: user_text },
+                ];
+                assert_eq!(user.content, expected_content);
+            }
+            other => panic!("expected TurnItem::UserMessage, got {other:?}"),
+        }
+    }
+
    #[test]
    fn skips_user_instructions_and_env() {
        let items = vec![
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -182,9 +182,19 @@ fn local_image_error_placeholder(

 pub const VIEW_IMAGE_TOOL_NAME: &str = "view_image";

+const IMAGE_OPEN_TAG: &str = "<image>";
+const IMAGE_CLOSE_TAG: &str = "</image>";
 const LOCAL_IMAGE_OPEN_TAG_PREFIX: &str = "<image name=";
 const LOCAL_IMAGE_OPEN_TAG_SUFFIX: &str = ">";
-const LOCAL_IMAGE_CLOSE_TAG: &str = "</image>";
+const LOCAL_IMAGE_CLOSE_TAG: &str = IMAGE_CLOSE_TAG;
+
+pub fn image_open_tag_text() -> String {
+    IMAGE_OPEN_TAG.to_string()
+}
+
+pub fn image_close_tag_text() -> String {
+    IMAGE_CLOSE_TAG.to_string()
+}

 pub fn local_image_label_text(label_number: usize) -> String {
    format!("[Image #{label_number}]")
@@ -201,7 +211,15 @@ pub fn is_local_image_open_tag_text(text: &str) -> bool {
 }

 pub fn is_local_image_close_tag_text(text: &str) -> bool {
-    text == LOCAL_IMAGE_CLOSE_TAG
+    is_image_close_tag_text(text)
+}
+
+pub fn is_image_open_tag_text(text: &str) -> bool {
+    text == IMAGE_OPEN_TAG
+}
+
+pub fn is_image_close_tag_text(text: &str) -> bool {
+    text == IMAGE_CLOSE_TAG
 }

 fn invalid_image_error_placeholder(
@@ -375,7 +393,15 @@ impl From<Vec<UserInput>> for ResponseInputItem {
                .into_iter()
                .flat_map(|c| match c {
                    UserInput::Text { text } => vec![ContentItem::InputText { text }],
-                    UserInput::Image { image_url } => vec![ContentItem::InputImage { image_url }],
+                    UserInput::Image { image_url } => vec![
+                        ContentItem::InputText {
+                            text: image_open_tag_text(),
+                        },
+                        ContentItem::InputImage { image_url },
+                        ContentItem::InputText {
+                            text: image_close_tag_text(),
+                        },
+                    ],
                    UserInput::LocalImage { path } => {
                        image_index += 1;
                        local_image_content_items_with_label_number(&path, true, image_index)
@@ -820,6 +846,33 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn wraps_image_user_input_with_tags() -> Result<()> {
+        let image_url = "data:image/png;base64,abc".to_string();
+
+        let item = ResponseInputItem::from(vec![UserInput::Image {
+            image_url: image_url.clone(),
+        }]);
+
+        match item {
+            ResponseInputItem::Message { content, .. } => {
+                let expected = vec![
+                    ContentItem::InputText {
+                        text: image_open_tag_text(),
+                    },
+                    ContentItem::InputImage { image_url },
+                    ContentItem::InputText {
+                        text: image_close_tag_text(),
+                    },
+                ];
+                assert_eq!(content, expected);
+            }
+            other => panic!("expected message response but got {other:?}"),
+        }
+
+        Ok(())
+    }
+
    #[test]
    fn local_image_read_error_adds_placeholder() -> Result<()> {
        let dir = tempdir()?;