Add under-development original-resolution view_image support (#13050)

## Summary

Add original-resolution support for `view_image` behind the
under-development `view_image_original_resolution` feature flag.

When the flag is enabled and the target model is `gpt-5.3-codex` or
newer, `view_image` now preserves original PNG/JPEG/WebP bytes and sends
`detail: "original"` to the Responses API instead of using the legacy
resize/compress path.

## What changed

- Added `view_image_original_resolution` as an under-development feature
flag.
- Added `ImageDetail` to the protocol models and support for serializing
`detail: "original"` on tool-returned images.
- Added `PromptImageMode::Original` to `codex-utils-image`.
  - Preserves original PNG/JPEG/WebP bytes.
  - Keeps legacy behavior for the resize path.
- Updated `view_image` to:
- use the shared `local_image_content_items_with_label_number(...)`
helper in both code paths
  - select original-resolution mode only when:
    - the feature flag is enabled, and
    - the model slug parses as `gpt-5.3-codex` or newer
- Kept local user image attachments on the existing resize path; this
change is specific to `view_image`.
- Updated history/image accounting so only `detail: "original"` images
use the docs-based GPT-5 image cost calculation; legacy images still use
the old fixed estimate.
- Added JS REPL guidance, gated on the same feature flag, to prefer JPEG
at 85% quality unless lossless is required, while still allowing other
formats when explicitly requested.
- Updated tests and helper code that construct
`FunctionCallOutputContentItem::InputImage` to carry the new `detail`
field.

## Behavior

### Feature off
- `view_image` keeps the existing resize/re-encode behavior.
- History estimation keeps the existing fixed-cost heuristic.

### Feature on + `gpt-5.3-codex+`
- `view_image` sends original-resolution images with `detail:
"original"`.
- PNG/JPEG/WebP source bytes are preserved when possible.
- History estimation uses the GPT-5 docs-based image-cost calculation
for those `detail: "original"` images.


#### [git stack](https://github.com/magus/git-stack-cli)
- 👉 `1` https://github.com/openai/codex/pull/13050
-  `2` https://github.com/openai/codex/pull/13331
-  `3` https://github.com/openai/codex/pull/13049
This commit is contained in:
Curtis 'Fjord' Hawthorne
2026-03-03 15:56:54 -08:00
committed by GitHub
parent 935754baa3
commit b92146d48b
37 changed files with 794 additions and 107 deletions

View File

@@ -278,6 +278,7 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,
@@ -439,6 +440,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(large_context_window),
auto_compact_token_limit: None,
effective_context_window_percent,

View File

@@ -344,6 +344,7 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo {
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,

View File

@@ -619,6 +619,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(128_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,
@@ -729,6 +730,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() -
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(128_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,

View File

@@ -303,6 +303,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,
@@ -543,6 +544,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,
@@ -1007,6 +1009,7 @@ fn test_remote_model_with_policy(
apply_patch_tool_type: None,
truncation_policy,
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,

View File

@@ -409,6 +409,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,

View File

@@ -291,6 +291,208 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.3-codex")
.with_config(|config| {
config.features.enable(Feature::ImageDetailOriginal);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let rel_path = "assets/original-example.png";
let abs_path = cwd.path().join(rel_path);
if let Some(parent) = abs_path.parent() {
std::fs::create_dir_all(parent)?;
}
let original_width = 2304;
let original_height = 864;
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
image.save(&abs_path)?;
let call_id = "view-image-original";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once(&server, first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
let mock = responses::mount_sse_once(&server, second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please add the original screenshot".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
service_tier: None,
summary: None,
collaboration_mode: None,
personality: None,
})
.await?;
wait_for_event_with_timeout(
&codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
)
.await;
let req = mock.single_request();
let function_output = req.function_call_output(call_id);
let output_items = function_output
.get("output")
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(output_items.len(), 1);
assert_eq!(
output_items[0].get("detail").and_then(Value::as_str),
Some("original")
);
let image_url = output_items[0]
.get("image_url")
.and_then(Value::as_str)
.expect("image_url present");
let (_, encoded) = image_url
.split_once(',')
.expect("image url contains data prefix");
let decoded = BASE64_STANDARD
.decode(encoded)
.expect("image data decodes from base64 for request");
let preserved = load_from_memory(&decoded).expect("load preserved image");
let (width, height) = preserved.dimensions();
assert_eq!(width, original_width);
assert_eq!(height, original_height);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5.2").with_config(|config| {
config.features.enable(Feature::ImageDetailOriginal);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let rel_path = "assets/original-example-lower-model.png";
let abs_path = cwd.path().join(rel_path);
if let Some(parent) = abs_path.parent() {
std::fs::create_dir_all(parent)?;
}
let original_width = 2304;
let original_height = 864;
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
image.save(&abs_path)?;
let call_id = "view-image-original-lower-model";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once(&server, first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
let mock = responses::mount_sse_once(&server, second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please add the screenshot".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
service_tier: None,
summary: None,
collaboration_mode: None,
personality: None,
})
.await?;
wait_for_event_with_timeout(
&codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
)
.await;
let req = mock.single_request();
let function_output = req.function_call_output(call_id);
let output_items = function_output
.get("output")
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(output_items.len(), 1);
assert_eq!(output_items[0].get("detail"), None);
let image_url = output_items[0]
.get("image_url")
.and_then(Value::as_str)
.expect("image_url present");
let (prefix, encoded) = image_url
.split_once(',')
.expect("image url contains data prefix");
assert_eq!(prefix, "data:image/png;base64");
let decoded = BASE64_STANDARD
.decode(encoded)
.expect("image data decodes from base64 for request");
let resized = load_from_memory(&decoded).expect("load resized image");
let (resized_width, resized_height) = resized.dimensions();
assert!(resized_width <= 2048);
assert!(resized_height <= 768);
assert!(resized_width < original_width);
assert!(resized_height < original_height);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn js_repl_view_image_tool_attaches_local_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
@@ -674,6 +876,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
auto_compact_token_limit: None,
effective_context_window_percent: 95,