Add under-development original-resolution view_image support (#13050)

## Summary Add original-resolution support for `view_image` behind the under-development `view_image_original_resolution` feature flag. When the flag is enabled and the target model is `gpt-5.3-codex` or newer, `view_image` now preserves original PNG/JPEG/WebP bytes and sends `detail: "original"` to the Responses API instead of using the legacy resize/compress path. ## What changed - Added `view_image_original_resolution` as an under-development feature flag. - Added `ImageDetail` to the protocol models and support for serializing `detail: "original"` on tool-returned images. - Added `PromptImageMode::Original` to `codex-utils-image`. - Preserves original PNG/JPEG/WebP bytes. - Keeps legacy behavior for the resize path. - Updated `view_image` to: - use the shared `local_image_content_items_with_label_number(...)` helper in both code paths - select original-resolution mode only when: - the feature flag is enabled, and - the model slug parses as `gpt-5.3-codex` or newer - Kept local user image attachments on the existing resize path; this change is specific to `view_image`. - Updated history/image accounting so only `detail: "original"` images use the docs-based GPT-5 image cost calculation; legacy images still use the old fixed estimate. - Added JS REPL guidance, gated on the same feature flag, to prefer JPEG at 85% quality unless lossless is required, while still allowing other formats when explicitly requested. - Updated tests and helper code that construct `FunctionCallOutputContentItem::InputImage` to carry the new `detail` field. ## Behavior ### Feature off - `view_image` keeps the existing resize/re-encode behavior. - History estimation keeps the existing fixed-cost heuristic. ### Feature on + `gpt-5.3-codex+` - `view_image` sends original-resolution images with `detail: "original"`. - PNG/JPEG/WebP source bytes are preserved when possible. - History estimation uses the GPT-5 docs-based image-cost calculation for those `detail: "original"` images. #### [git stack](https://github.com/magus/git-stack-cli) - 👉 `1` https://github.com/openai/codex/pull/13050 - ⏳ `2` https://github.com/openai/codex/pull/13331 - ⏳ `3` https://github.com/openai/codex/pull/13049
2026-04-29 00:55:38 +00:00 · 2026-03-03 15:56:54 -08:00
parent 935754baa3
commit b92146d48b
37 changed files with 794 additions and 107 deletions
--- a/codex-rs/core/tests/suite/view_image.rs
+++ b/codex-rs/core/tests/suite/view_image.rs
@@ -291,6 +291,208 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex()
+        .with_model("gpt-5.3-codex")
+        .with_config(|config| {
+            config.features.enable(Feature::ImageDetailOriginal);
+        });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let rel_path = "assets/original-example.png";
+    let abs_path = cwd.path().join(rel_path);
+    if let Some(parent) = abs_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+    let original_width = 2304;
+    let original_height = 864;
+    let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
+    image.save(&abs_path)?;
+
+    let call_id = "view-image-original";
+    let arguments = serde_json::json!({ "path": rel_path }).to_string();
+
+    let first_response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, "view_image", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once(&server, first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    let mock = responses::mount_sse_once(&server, second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "please add the original screenshot".into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            service_tier: None,
+            summary: None,
+            collaboration_mode: None,
+            personality: None,
+        })
+        .await?;
+
+    wait_for_event_with_timeout(
+        &codex,
+        |event| matches!(event, EventMsg::TurnComplete(_)),
+        Duration::from_secs(10),
+    )
+    .await;
+
+    let req = mock.single_request();
+    let function_output = req.function_call_output(call_id);
+    let output_items = function_output
+        .get("output")
+        .and_then(Value::as_array)
+        .expect("function_call_output should be a content item array");
+    assert_eq!(output_items.len(), 1);
+    assert_eq!(
+        output_items[0].get("detail").and_then(Value::as_str),
+        Some("original")
+    );
+    let image_url = output_items[0]
+        .get("image_url")
+        .and_then(Value::as_str)
+        .expect("image_url present");
+
+    let (_, encoded) = image_url
+        .split_once(',')
+        .expect("image url contains data prefix");
+    let decoded = BASE64_STANDARD
+        .decode(encoded)
+        .expect("image data decodes from base64 for request");
+    let preserved = load_from_memory(&decoded).expect("load preserved image");
+    let (width, height) = preserved.dimensions();
+    assert_eq!(width, original_width);
+    assert_eq!(height, original_height);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_model("gpt-5.2").with_config(|config| {
+        config.features.enable(Feature::ImageDetailOriginal);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let rel_path = "assets/original-example-lower-model.png";
+    let abs_path = cwd.path().join(rel_path);
+    if let Some(parent) = abs_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+    let original_width = 2304;
+    let original_height = 864;
+    let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
+    image.save(&abs_path)?;
+
+    let call_id = "view-image-original-lower-model";
+    let arguments = serde_json::json!({ "path": rel_path }).to_string();
+
+    let first_response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, "view_image", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once(&server, first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    let mock = responses::mount_sse_once(&server, second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "please add the screenshot".into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            service_tier: None,
+            summary: None,
+            collaboration_mode: None,
+            personality: None,
+        })
+        .await?;
+
+    wait_for_event_with_timeout(
+        &codex,
+        |event| matches!(event, EventMsg::TurnComplete(_)),
+        Duration::from_secs(10),
+    )
+    .await;
+
+    let req = mock.single_request();
+    let function_output = req.function_call_output(call_id);
+    let output_items = function_output
+        .get("output")
+        .and_then(Value::as_array)
+        .expect("function_call_output should be a content item array");
+    assert_eq!(output_items.len(), 1);
+    assert_eq!(output_items[0].get("detail"), None);
+
+    let image_url = output_items[0]
+        .get("image_url")
+        .and_then(Value::as_str)
+        .expect("image_url present");
+
+    let (prefix, encoded) = image_url
+        .split_once(',')
+        .expect("image url contains data prefix");
+    assert_eq!(prefix, "data:image/png;base64");
+
+    let decoded = BASE64_STANDARD
+        .decode(encoded)
+        .expect("image data decodes from base64 for request");
+    let resized = load_from_memory(&decoded).expect("load resized image");
+    let (resized_width, resized_height) = resized.dimensions();
+    assert!(resized_width <= 2048);
+    assert!(resized_height <= 768);
+    assert!(resized_width < original_width);
+    assert!(resized_height < original_height);
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn js_repl_view_image_tool_attaches_local_image() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
@@ -674,6 +876,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
        apply_patch_tool_type: None,
        truncation_policy: TruncationPolicyConfig::bytes(10_000),
        supports_parallel_tool_calls: false,
+        supports_image_detail_original: false,
        context_window: Some(272_000),
        auto_compact_token_limit: None,
        effective_context_window_percent: 95,