Update image outputs to default to high detail (#18386)

Do not assume the default `detail`.
This commit is contained in:
pakrym-oai
2026-04-18 11:01:12 -07:00
committed by GitHub
parent e3c2acb9cd
commit 53b1570367
35 changed files with 245 additions and 93 deletions

View File

@@ -1,4 +1,5 @@
use codex_code_mode::ImageDetail as CodeModeImageDetail;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ImageDetail;
@@ -36,7 +37,9 @@ impl IntoProtocol<FunctionCallOutputContentItem>
codex_code_mode::FunctionCallOutputContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: detail.map(IntoProtocol::into_protocol),
detail: detail
.map(IntoProtocol::into_protocol)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
}

View File

@@ -7,6 +7,7 @@ use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -463,10 +464,10 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) ->
| codex_protocol::models::ContentItem::OutputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
codex_protocol::models::ContentItem::InputImage { image_url } => {
codex_protocol::models::ContentItem::InputImage { image_url, detail } => {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: None,
detail: detail.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}
})

View File

@@ -1,4 +1,5 @@
use super::*;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use core_test_support::assert_regex_match;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -173,7 +174,7 @@ fn mcp_tool_output_response_item_preserves_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: image_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -239,7 +240,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),
@@ -259,7 +260,7 @@ fn custom_tool_calls_can_derive_text_from_content_items() {
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "line 2".to_string(),

View File

@@ -1,3 +1,4 @@
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -133,7 +134,11 @@ impl ToolHandler for ViewImageHandler {
} else {
PromptImageMode::ResizeToFit
};
let image_detail = use_original_detail.then_some(ImageDetail::Original);
let image_detail = Some(if use_original_detail {
ImageDetail::Original
} else {
DEFAULT_IMAGE_DETAIL
});
let image =
load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| {
@@ -210,7 +215,7 @@ mod tests {
fn code_mode_result_returns_image_url_object() {
let output = ViewImageOutput {
image_url: "data:image/png;base64,AAA".to_string(),
image_detail: None,
image_detail: Some(DEFAULT_IMAGE_DETAIL),
};
let result = output.code_mode_result(&ToolPayload::Function {
@@ -221,7 +226,7 @@ mod tests {
result,
json!({
"image_url": "data:image/png;base64,AAA",
"detail": null,
"detail": "high",
})
);
}

View File

@@ -1225,9 +1225,9 @@ function parseImageDetail(detail) {
if (typeof detail !== "string" || !detail) {
throw new Error("codex.emitImage expected detail to be a non-empty string");
}
if (detail !== "original") {
if (!["auto", "low", "high", "original"].includes(detail)) {
throw new Error(
'codex.emitImage only supports detail "original"; omit detail for default behavior',
'codex.emitImage expected detail to be one of "auto", "low", "high", or "original"',
);
}
return detail;
@@ -1331,10 +1331,17 @@ function normalizeMcpImageData(data, mimeType) {
}
function parseMcpImageDetail(meta) {
if (!isPlainObject(meta) || meta["codex/imageDetail"] !== "original") {
if (!isPlainObject(meta)) {
return undefined;
}
return "original";
const detail = meta["codex/imageDetail"];
if (
typeof detail !== "string" ||
!["auto", "low", "high", "original"].includes(detail)
) {
return undefined;
}
return detail;
}
function parseMcpToolResult(result) {

View File

@@ -10,6 +10,7 @@ use std::time::Duration;
use codex_protocol::ThreadId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -1750,7 +1751,8 @@ fn emitted_image_content_item(
) -> FunctionCallOutputContentItem {
FunctionCallOutputContentItem::InputImage {
image_url,
detail: normalize_output_image_detail(&turn.model_info, detail),
detail: normalize_output_image_detail(&turn.model_info, detail)
.or(Some(DEFAULT_IMAGE_DETAIL)),
}
}

View File

@@ -5,6 +5,7 @@ use crate::turn_diff_tracker::TurnDiffTracker;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolResponse;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ImageDetail;
@@ -253,7 +254,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -277,7 +278,7 @@ fn summarize_tool_call_response_for_multimodal_function_output() {
}
#[tokio::test]
async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
async fn emitted_image_content_item_preserves_explicit_non_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
&turn,
@@ -288,7 +289,7 @@ async fn emitted_image_content_item_drops_unsupported_explicit_detail() {
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(ImageDetail::Low),
}
);
}
@@ -314,7 +315,7 @@ async fn emitted_image_content_item_allows_explicit_original_detail_when_support
}
#[tokio::test]
async fn emitted_image_content_item_drops_explicit_original_detail_when_unsupported() {
async fn emitted_image_content_item_defaults_to_high_for_unsupported_original_detail() {
let (_session, turn) = make_session_and_context().await;
let content_item = emitted_image_content_item(
@@ -327,7 +328,7 @@ async fn emitted_image_content_item_drops_explicit_original_detail_when_unsuppor
content_item,
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
);
}
@@ -356,7 +357,7 @@ fn summarize_tool_call_response_for_multimodal_custom_output() {
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,abcd".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]),
};
@@ -1213,7 +1214,7 @@ console.log(out.type);
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1268,7 +1269,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png" });
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1325,13 +1326,13 @@ await codex.emitImage(
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url:
"data:image/gif;base64,R0lGODdhAQABAIAAAP///////ywAAAAAAQABAAACAkQBADs="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
.as_slice()
@@ -1387,7 +1388,7 @@ console.log("cell-complete");
image_url:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1465,11 +1466,11 @@ console.log("helpers-ran");
vec![
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputImage {
image_url: data_url.to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
},
]
);
@@ -1701,7 +1702,7 @@ await codex.emitImage("DATA:image/png;base64,AAA");
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "DATA:image/png;base64,AAA".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);
@@ -1751,10 +1752,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: "ultra" });
)
.await
.expect_err("invalid detail should fail");
assert!(
err.to_string()
.contains("only supports detail \"original\"")
);
assert!(err.to_string().contains("expected detail to be one of"));
assert!(session.get_pending_input().await.is_empty());
Ok(())
@@ -1804,7 +1802,7 @@ await codex.emitImage({ bytes: png, mimeType: "image/png", detail: null });
result.content_items.as_slice(),
[FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
detail: None,
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
.as_slice()
);