[codex] remove plain image wrapper spans (#24652)

## Why

Remote image submissions currently wrap native `input_image` spans in
literal `<image>` and `</image>` text spans. Those extra prompt tokens
add structure without providing label or routing information.

## What Changed

- Serialize `UserInput::Image` directly as an `input_image` content
span.
- Preserve named local-image framing and legacy wrapper parsing for
labeled attachments and existing histories.
- Update existing request-shape expectations for drag-and-drop images,
model switching, and compaction.

## Validation

- `just test -p codex-protocol`
- Focused `codex-core` run covering
`drag_drop_image_persists_rollout_request_shape`,
`model_change_from_image_to_text_strips_prior_image_content`, and
`snapshot_request_shape_pre_turn_compaction_including_incoming_user_message`

## Notes

- A broader `just test -p codex-core` run was attempted; the affected
tests passed, while the overall run failed in unrelated CLI, MCP, and
tooling tests plus a `thread_manager` timeout.
This commit is contained in:
pakrym-oai
2026-05-26 15:49:37 -07:00
committed by GitHub
parent 0a6bc4e687
commit 46391f7efa
4 changed files with 16 additions and 65 deletions

View File

@@ -257,16 +257,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()>
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputText {
text: codex_protocol::models::image_open_tag_text(),
},
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),
},
ContentItem::InputText {
text: "dropped image".to_string(),
},

View File

@@ -555,19 +555,6 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
.any(|text| text == "image content omitted because you do not support image input"),
"second request should include the image-omitted placeholder text"
);
assert!(
second_user_texts
.iter()
.any(|text| text == &codex_protocol::models::image_open_tag_text()),
"second request should preserve the image open tag text"
);
assert!(
second_user_texts
.iter()
.any(|text| text == &codex_protocol::models::image_close_tag_text()),
"second request should preserve the image close tag text"
);
Ok(())
}

View File

@@ -19,8 +19,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
02:message/user:<COMPACTION_SUMMARY>\nPRE_TURN_SUMMARY
03:message/developer:<PERMISSIONS_INSTRUCTIONS>
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
05:message/user[4]:
[01] <image>
[02] <input_image:detail,image_url>
[03] </image>
[04] USER_THREE
05:message/user[2]:
[01] <input_image:detail,image_url>
[02] USER_THREE

View File

@@ -1238,18 +1238,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
UserInput::Image { image_url, detail } => {
image_index += 1;
let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
vec![
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage {
image_url,
detail: Some(detail),
},
ContentItem::InputText {
text: image_close_tag_text(),
},
]
vec![ContentItem::InputImage {
image_url,
detail: Some(detail),
}]
}
UserInput::LocalImage { path, detail } => {
image_index += 1;
@@ -2622,7 +2614,7 @@ mod tests {
}
#[test]
fn wraps_image_user_input_with_tags() -> Result<()> {
fn serializes_image_user_input_without_tags() -> Result<()> {
let image_url = "data:image/png;base64,abc".to_string();
let item = ResponseInputItem::from(vec![UserInput::Image {
@@ -2632,18 +2624,10 @@ mod tests {
match item {
ResponseInputItem::Message { content, .. } => {
let expected = vec![
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: image_close_tag_text(),
},
];
let expected = vec![ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
assert_eq!(content, expected);
}
other => panic!("expected message response but got {other:?}"),
@@ -2664,7 +2648,7 @@ mod tests {
match item {
ResponseInputItem::Message { content, .. } => {
assert_eq!(
content.get(1),
content.first(),
Some(&ContentItem::InputImage {
image_url,
detail: Some(ImageDetail::Original),
@@ -2862,35 +2846,23 @@ mod tests {
ResponseInputItem::Message { content, .. } => {
assert_eq!(
content.first(),
Some(&ContentItem::InputText {
text: image_open_tag_text(),
})
);
assert_eq!(
content.get(1),
Some(&ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
})
);
assert_eq!(
content.get(2),
Some(&ContentItem::InputText {
text: image_close_tag_text(),
})
);
assert_eq!(
content.get(3),
content.get(1),
Some(&ContentItem::InputText {
text: local_image_open_tag_text(/*label_number*/ 2),
})
);
assert!(matches!(
content.get(4),
content.get(2),
Some(ContentItem::InputImage { .. })
));
assert_eq!(
content.get(5),
content.get(3),
Some(&ContentItem::InputText {
text: image_close_tag_text(),
})