Update image outputs to default to high detail (#18386)

Do not assume the default `detail`.
This commit is contained in:
pakrym-oai
2026-04-18 11:01:12 -07:00
committed by GitHub
parent e3c2acb9cd
commit 53b1570367
35 changed files with 245 additions and 93 deletions

View File

@@ -1,6 +1,7 @@
use super::*;
use codex_model_provider_info::ModelProviderInfo;
use codex_model_provider_info::WireApi;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use pretty_assertions::assert_eq;
async fn process_compacted_history_with_test_session(
@@ -45,6 +46,7 @@ fn content_items_to_text_joins_non_empty_segments() {
fn content_items_to_text_ignores_image_only_content() {
let items = vec![ContentItem::InputImage {
image_url: "file://image.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
let joined = content_items_to_text(&items);

View File

@@ -649,8 +649,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) {
match item {
ResponseItem::Message { content, .. } => {
for content_item in content {
if let ContentItem::InputImage { image_url } = content_item {
accumulate(image_url, None);
if let ContentItem::InputImage { image_url, detail } = content_item {
accumulate(image_url, *detail);
}
}
}

View File

@@ -6,6 +6,7 @@ use codex_protocol::AgentPath;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -382,6 +383,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
ContentItem::InputImage {
image_url: "https://example.com/img.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: "caption".to_string(),
@@ -405,7 +407,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/result.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
},
@@ -425,7 +427,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/js-repl-result.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
},
@@ -506,6 +508,7 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
},
ContentItem::InputImage {
image_url: "https://example.com/img.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
@@ -715,7 +718,7 @@ fn replace_last_turn_images_replaces_tool_output_images() {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
success: Some(true),
@@ -752,6 +755,7 @@ fn replace_last_turn_images_does_not_touch_user_images() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1680,7 +1684,10 @@ fn image_data_url_payload_does_not_dominate_message_estimate() {
ContentItem::InputText {
text: "Here is the screenshot".to_string(),
},
ContentItem::InputImage { image_url },
ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
phase: None,
@@ -1717,7 +1724,7 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() {
},
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1743,7 +1750,7 @@ fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() {
},
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1763,6 +1770,7 @@ fn non_base64_image_urls_are_unchanged() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "https://example.com/foo.png".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1772,7 +1780,7 @@ fn non_base64_image_urls_are_unchanged() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "file:///tmp/foo.png".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1794,6 +1802,7 @@ fn data_url_without_base64_marker_is_unchanged() {
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg'/>".to_string(),
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
@@ -1814,7 +1823,7 @@ fn non_image_base64_data_url_is_unchanged() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1832,7 +1841,10 @@ fn mixed_case_data_url_markers_are_adjusted() {
let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage { image_url }],
content: vec![ContentItem::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
}],
end_turn: None,
phase: None,
};
@@ -1859,9 +1871,11 @@ fn multiple_inline_images_apply_multiple_fixed_costs() {
},
ContentItem::InputImage {
image_url: image_url_one,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputImage {
image_url: image_url_two,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,

View File

@@ -90,7 +90,7 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
text_elements: Vec::new(),
});
}
ContentItem::InputImage { image_url } => {
ContentItem::InputImage { image_url, .. } => {
content.push(UserInput::Image {
image_url: image_url.clone(),
});

View File

@@ -5,6 +5,7 @@ use codex_protocol::items::TurnItem;
use codex_protocol::items::WebSearchItem;
use codex_protocol::items::build_hook_prompt_message;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
@@ -26,9 +27,11 @@ fn parses_user_message_with_text_and_two_images() {
},
ContentItem::InputImage {
image_url: img1.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputImage {
image_url: img2.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
],
end_turn: None,
@@ -66,6 +69,7 @@ fn skips_local_image_label_text() {
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: "</image>".to_string(),
@@ -145,6 +149,7 @@ fn skips_unnamed_image_label_text() {
ContentItem::InputText { text: label },
ContentItem::InputImage {
image_url: image_url.clone(),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
ContentItem::InputText {
text: codex_protocol::models::image_close_tag_text(),

View File

@@ -1,4 +1,5 @@
use codex_code_mode::ImageDetail as CodeModeImageDetail;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
@@ -36,7 +37,9 @@ impl IntoProtocol<FunctionCallOutputContentItem>
codex_code_mode::FunctionCallOutputContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: detail.map(IntoProtocol::into_protocol),
detail: detail
.map(IntoProtocol::into_protocol)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
}

View File

@@ -7,6 +7,7 @@ use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -463,10 +464,10 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) ->
| codex_protocol::models::ContentItem::OutputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
codex_protocol::models::ContentItem::InputImage { image_url } => {
codex_protocol::models::ContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: detail.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
})

View File

@@ -1,4 +1,5 @@
use super::*;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use core_test_support::assert_regex_match;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -173,7 +174,7 @@ fn mcp_tool_output_response_item_preserves_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: image_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -239,7 +240,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),
@@ -259,7 +260,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),

View File

@@ -1,3 +1,4 @@
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -133,7 +134,11 @@ impl ToolHandler for ViewImageHandler {
} else {
PromptImageMode::ResizeToFit
};
let image_detail = use_original_detail.then_some(ImageDetail::Original);
let image_detail = Some(if use_original_detail {
ImageDetail::Original
} else {
DEFAULT_IMAGE_DETAIL
});
let image =
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
@@ -210,7 +215,7 @@ mod tests {
fn code_mode_result_returns_image_url_object() {
let output = ViewImageOutput {
image_url: "data:image/png;base64,AAA".to_string(),
image_detail: None,
image_detail: Some(DEFAULT_IMAGE_DETAIL),
};
let result = output.code_mode_result(&ToolPayload::Function {
@@ -221,7 +226,7 @@ mod tests {
result,
json!({
"image_url": "data:image/png;base64,AAA",
"detail": null,
"detail": "high",
})
);
}

View File

@@ -1225,9 +1225,9 @@ function parseImageDetail(detail) {
if (typeof detail !== "string" || !detail) {
throw new Error("codex.emitImage expected detail to be a non-empty string");
}
if (detail !== "original") {
if (!["auto", "low", "high", "original"].includes(detail)) {
throw new Error(
'codex.emitImage only supports detail "original"; omit detail for default behavior',
'codex.emitImage expected detail to be one of "auto", "low", "high", or "original"',
);
}
return detail;
@@ -1331,10 +1331,17 @@ function normalizeMcpImageData(data, mimeType) {
}
function parseMcpImageDetail(meta) {
if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") {
if (!isPlainObject(meta)) {
return undefined;
}
return "original";
const detail = meta["codex/imageDetail"];
if (
typeof detail !== "string" ||
!["auto", "low", "high", "original"].includes(detail)
) {
return undefined;
}
return detail;
}
function parseMcpToolResult(result) {

View File

@@ -10,6 +10,7 @@ use std::time::Duration;
use codex_protocol::ThreadId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -1750,7 +1751,8 @@ fn emitted_image_content_item(
) -> FunctionCallOutputContentItem {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: normalize_output_image_detail(&turn.model_info, detail),
detail: normalize_output_image_detail(&turn.model_info, detail)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}

View File

@@ -5,6 +5,7 @@ use crate::turn_diff_tracker::TurnDiffTracker;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolResponse;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -253,7 +254,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -277,7 +278,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
}
#[tokio::test]
async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
async fn emitted_image_content_item_preserves_explicit_non_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
&turn,
@@ -288,7 +289,7 @@ async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(ImageDetail::Low),
}
);
}
@@ -314,7 +315,7 @@ async fn emitted_image_content_item_allows_explicit_original_detail_when_support
}
#[tokio::test]
async fn emitted_image_content_item_drops_explicit_original_detail_when_unsupported() {
async fn emitted_image_content_item_defaults_to_high_for_unsupported_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
@@ -327,7 +328,7 @@ async fn emitted_image_content_item_drops_explicit_original_detail_when_unsuppor
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
);
}
@@ -356,7 +357,7 @@ fn summarize_tool_call_response_for_multimodal_custom_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1213,7 +1214,7 @@ console.log(out.type);
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1268,7 +1269,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png" });
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1325,13 +1326,13 @@ await codex.emitImage(
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url:
"data:image/gif;base64,R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -1387,7 +1388,7 @@ console.log("cell-complete");
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1465,11 +1466,11 @@ console.log("helpers-ran");
vec![
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
);
@@ -1701,7 +1702,7 @@ await codex.emitImage("DATA:image/png;base64,AAA");
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "DATA:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1751,10 +1752,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: "ultra" });
)
.await
.expect_err("invalid detail should fail");
assert!(
err.to_string()
.contains("only supports detail \"original\"")
);
assert!(err.to_string().contains("expected detail to be one of"));
assert!(session.get_pending_input().await.is_empty());
Ok(())
@@ -1804,7 +1802,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: null });
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);