Wrap Image UserInput in <image> tags as well

This commit is contained in:
Charles Cunningham
2026-01-09 17:40:45 -08:00
parent 20e4a2cf3f
commit 1c4e08caf9
2 changed files with 97 additions and 5 deletions

View File

@@ -9,6 +9,8 @@ use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::WebSearchAction;
use codex_protocol::models::is_image_close_tag_text;
use codex_protocol::models::is_image_open_tag_text;
use codex_protocol::models::is_local_image_close_tag_text;
use codex_protocol::models::is_local_image_open_tag_text;
use codex_protocol::user_input::UserInput;
@@ -37,10 +39,10 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
for (idx, content_item) in message.iter().enumerate() {
match content_item {
ContentItem::InputText { text } => {
if is_local_image_open_tag_text(text)
if (is_local_image_open_tag_text(text) || is_image_open_tag_text(text))
&& (matches!(message.get(idx + 1), Some(ContentItem::InputImage { .. })))
|| (idx > 0
&& is_local_image_close_tag_text(text)
&& (is_local_image_close_tag_text(text) || is_image_close_tag_text(text))
&& matches!(message.get(idx - 1), Some(ContentItem::InputImage { .. })))
{
continue;
@@ -224,6 +226,43 @@ mod tests {
}
}
#[test]
fn skips_unnamed_image_label_text() {
let image_url = "data:image/png;base64,abc".to_string();
let label = codex_protocol::models::image_open_tag_text();
let user_text = "Please review this image.".to_string();
let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
},
ContentItem::InputText {
text: user_text.clone(),
},
],
};
let turn_item = parse_turn_item(&item).expect("expected user message turn item");
match turn_item {
TurnItem::UserMessage(user) => {
let expected_content = vec![
UserInput::Image { image_url },
UserInput::Text { text: user_text },
];
assert_eq!(user.content, expected_content);
}
other => panic!("expected TurnItem::UserMessage, got {other:?}"),
}
}
#[test]
fn skips_user_instructions_and_env() {
let items = vec![

View File

@@ -182,9 +182,19 @@ fn local_image_error_placeholder(
pub const VIEW_IMAGE_TOOL_NAME: &str = "view_image";
const IMAGE_OPEN_TAG: &str = "<image>";
const IMAGE_CLOSE_TAG: &str = "</image>";
const LOCAL_IMAGE_OPEN_TAG_PREFIX: &str = "<image name=";
const LOCAL_IMAGE_OPEN_TAG_SUFFIX: &str = ">";
const LOCAL_IMAGE_CLOSE_TAG: &str = "</image>";
const LOCAL_IMAGE_CLOSE_TAG: &str = IMAGE_CLOSE_TAG;
pub fn image_open_tag_text() -> String {
IMAGE_OPEN_TAG.to_string()
}
pub fn image_close_tag_text() -> String {
IMAGE_CLOSE_TAG.to_string()
}
pub fn local_image_label_text(label_number: usize) -> String {
format!("[Image #{label_number}]")
@@ -201,7 +211,15 @@ pub fn is_local_image_open_tag_text(text: &str) -> bool {
}
pub fn is_local_image_close_tag_text(text: &str) -> bool {
text == LOCAL_IMAGE_CLOSE_TAG
is_image_close_tag_text(text)
}
pub fn is_image_open_tag_text(text: &str) -> bool {
text == IMAGE_OPEN_TAG
}
pub fn is_image_close_tag_text(text: &str) -> bool {
text == IMAGE_CLOSE_TAG
}
fn invalid_image_error_placeholder(
@@ -375,7 +393,15 @@ impl From<Vec<UserInput>> for ResponseInputItem {
.into_iter()
.flat_map(|c| match c {
UserInput::Text { text } => vec![ContentItem::InputText { text }],
UserInput::Image { image_url } => vec![ContentItem::InputImage { image_url }],
UserInput::Image { image_url } => vec![
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage { image_url },
ContentItem::InputText {
text: image_close_tag_text(),
},
],
UserInput::LocalImage { path } => {
image_index += 1;
local_image_content_items_with_label_number(&path, true, image_index)
@@ -820,6 +846,33 @@ mod tests {
Ok(())
}
#[test]
fn wraps_image_user_input_with_tags() -> Result<()> {
let image_url = "data:image/png;base64,abc".to_string();
let item = ResponseInputItem::from(vec![UserInput::Image {
image_url: image_url.clone(),
}]);
match item {
ResponseInputItem::Message { content, .. } => {
let expected = vec![
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage { image_url },
ContentItem::InputText {
text: image_close_tag_text(),
},
];
assert_eq!(content, expected);
}
other => panic!("expected message response but got {other:?}"),
}
Ok(())
}
#[test]
fn local_image_read_error_adds_placeholder() -> Result<()> {
let dir = tempdir()?;