mirror of
https://github.com/openai/codex.git
synced 2026-05-28 15:00:16 +00:00
[codex] remove plain image wrapper spans (#24652)
## Why Remote image submissions currently wrap native `input_image` spans in literal `<image>` and `</image>` text spans. Those extra prompt tokens add structure without providing label or routing information. ## What Changed - Serialize `UserInput::Image` directly as an `input_image` content span. - Preserve named local-image framing and legacy wrapper parsing for labeled attachments and existing histories. - Update existing request-shape expectations for drag-and-drop images, model switching, and compaction. ## Validation - `just test -p codex-protocol` - Focused `codex-core` run covering `drag_drop_image_persists_rollout_request_shape`, `model_change_from_image_to_text_strips_prior_image_content`, and `snapshot_request_shape_pre_turn_compaction_including_incoming_user_message` ## Notes - A broader `just test -p codex-core` run was attempted; the affected tests passed, while the overall run failed in unrelated CLI, MCP, and tooling tests plus a `thread_manager` timeout.
This commit is contained in:
@@ -257,16 +257,10 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()>
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: codex_protocol::models::image_close_tag_text(),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: "dropped image".to_string(),
|
||||
},
|
||||
|
||||
@@ -555,19 +555,6 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
|
||||
.any(|text| text == "image content omitted because you do not support image input"),
|
||||
"second request should include the image-omitted placeholder text"
|
||||
);
|
||||
assert!(
|
||||
second_user_texts
|
||||
.iter()
|
||||
.any(|text| text == &codex_protocol::models::image_open_tag_text()),
|
||||
"second request should preserve the image open tag text"
|
||||
);
|
||||
assert!(
|
||||
second_user_texts
|
||||
.iter()
|
||||
.any(|text| text == &codex_protocol::models::image_close_tag_text()),
|
||||
"second request should preserve the image close tag text"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -19,8 +19,6 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
|
||||
02:message/user:<COMPACTION_SUMMARY>\nPRE_TURN_SUMMARY
|
||||
03:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
05:message/user[4]:
|
||||
[01] <image>
|
||||
[02] <input_image:detail,image_url>
|
||||
[03] </image>
|
||||
[04] USER_THREE
|
||||
05:message/user[2]:
|
||||
[01] <input_image:detail,image_url>
|
||||
[02] USER_THREE
|
||||
|
||||
@@ -1238,18 +1238,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
UserInput::Image { image_url, detail } => {
|
||||
image_index += 1;
|
||||
let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
|
||||
vec![
|
||||
ContentItem::InputText {
|
||||
text: image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(detail),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
},
|
||||
]
|
||||
vec![ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(detail),
|
||||
}]
|
||||
}
|
||||
UserInput::LocalImage { path, detail } => {
|
||||
image_index += 1;
|
||||
@@ -2622,7 +2614,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wraps_image_user_input_with_tags() -> Result<()> {
|
||||
fn serializes_image_user_input_without_tags() -> Result<()> {
|
||||
let image_url = "data:image/png;base64,abc".to_string();
|
||||
|
||||
let item = ResponseInputItem::from(vec![UserInput::Image {
|
||||
@@ -2632,18 +2624,10 @@ mod tests {
|
||||
|
||||
match item {
|
||||
ResponseInputItem::Message { content, .. } => {
|
||||
let expected = vec![
|
||||
ContentItem::InputText {
|
||||
text: image_open_tag_text(),
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
},
|
||||
];
|
||||
let expected = vec![ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}];
|
||||
assert_eq!(content, expected);
|
||||
}
|
||||
other => panic!("expected message response but got {other:?}"),
|
||||
@@ -2664,7 +2648,7 @@ mod tests {
|
||||
match item {
|
||||
ResponseInputItem::Message { content, .. } => {
|
||||
assert_eq!(
|
||||
content.get(1),
|
||||
content.first(),
|
||||
Some(&ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(ImageDetail::Original),
|
||||
@@ -2862,35 +2846,23 @@ mod tests {
|
||||
ResponseInputItem::Message { content, .. } => {
|
||||
assert_eq!(
|
||||
content.first(),
|
||||
Some(&ContentItem::InputText {
|
||||
text: image_open_tag_text(),
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
content.get(1),
|
||||
Some(&ContentItem::InputImage {
|
||||
image_url,
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
content.get(2),
|
||||
Some(&ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
content.get(3),
|
||||
content.get(1),
|
||||
Some(&ContentItem::InputText {
|
||||
text: local_image_open_tag_text(/*label_number*/ 2),
|
||||
})
|
||||
);
|
||||
assert!(matches!(
|
||||
content.get(4),
|
||||
content.get(2),
|
||||
Some(ContentItem::InputImage { .. })
|
||||
));
|
||||
assert_eq!(
|
||||
content.get(5),
|
||||
content.get(3),
|
||||
Some(&ContentItem::InputText {
|
||||
text: image_close_tag_text(),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user