mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
Add under-development original-resolution view_image support (#13050)
## Summary
Add original-resolution support for `view_image` behind the
under-development `view_image_original_resolution` feature flag.
When the flag is enabled and the target model is `gpt-5.3-codex` or
newer, `view_image` now preserves original PNG/JPEG/WebP bytes and sends
`detail: "original"` to the Responses API instead of using the legacy
resize/compress path.
## What changed
- Added `view_image_original_resolution` as an under-development feature
flag.
- Added `ImageDetail` to the protocol models and support for serializing
`detail: "original"` on tool-returned images.
- Added `PromptImageMode::Original` to `codex-utils-image`.
- Preserves original PNG/JPEG/WebP bytes.
- Keeps legacy behavior for the resize path.
- Updated `view_image` to:
- use the shared `local_image_content_items_with_label_number(...)`
helper in both code paths
- select original-resolution mode only when:
- the feature flag is enabled, and
- the model slug parses as `gpt-5.3-codex` or newer
- Kept local user image attachments on the existing resize path; this
change is specific to `view_image`.
- Updated history/image accounting so only `detail: "original"` images
use the docs-based GPT-5 image cost calculation; legacy images still use
the old fixed estimate.
- Added JS REPL guidance, gated on the same feature flag, to prefer JPEG
at 85% quality unless lossless is required, while still allowing other
formats when explicitly requested.
- Updated tests and helper code that construct
`FunctionCallOutputContentItem::InputImage` to carry the new `detail`
field.
## Behavior
### Feature off
- `view_image` keeps the existing resize/re-encode behavior.
- History estimation keeps the existing fixed-cost heuristic.
### Feature on + `gpt-5.3-codex+`
- `view_image` sends original-resolution images with `detail:
"original"`.
- PNG/JPEG/WebP source bytes are preserved when possible.
- History estimation uses the GPT-5 docs-based image-cost calculation
for those `detail: "original"` images.
#### [git stack](https://github.com/magus/git-stack-cli)
- 👉 `1` https://github.com/openai/codex/pull/13050
- ⏳ `2` https://github.com/openai/codex/pull/13331
- ⏳ `3` https://github.com/openai/codex/pull/13049
This commit is contained in:
committed by
GitHub
parent
935754baa3
commit
b92146d48b
@@ -291,6 +291,208 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::ImageDetailOriginal);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/original-example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-original";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please add the original screenshot".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(
|
||||
output_items[0].get("detail").and_then(Value::as_str),
|
||||
Some("original")
|
||||
);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (_, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let preserved = load_from_memory(&decoded).expect("load preserved image");
|
||||
let (width, height) = preserved.dimensions();
|
||||
assert_eq!(width, original_width);
|
||||
assert_eq!(height, original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5.2").with_config(|config| {
|
||||
config.features.enable(Feature::ImageDetailOriginal);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/original-example-lower-model.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-original-lower-model";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please add the screenshot".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (prefix, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
assert_eq!(prefix, "data:image/png;base64");
|
||||
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (resized_width, resized_height) = resized.dimensions();
|
||||
assert!(resized_width <= 2048);
|
||||
assert!(resized_height <= 768);
|
||||
assert!(resized_width < original_width);
|
||||
assert!(resized_height < original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -674,6 +876,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
|
||||
apply_patch_tool_type: None,
|
||||
truncation_policy: TruncationPolicyConfig::bytes(10_000),
|
||||
supports_parallel_tool_calls: false,
|
||||
supports_image_detail_original: false,
|
||||
context_window: Some(272_000),
|
||||
auto_compact_token_limit: None,
|
||||
effective_context_window_percent: 95,
|
||||
|
||||
Reference in New Issue
Block a user