Add under-development original-resolution view_image support (#13050)

## Summary Add original-resolution support for `view_image` behind the under-development `view_image_original_resolution` feature flag. When the flag is enabled and the target model is `gpt-5.3-codex` or newer, `view_image` now preserves original PNG/JPEG/WebP bytes and sends `detail: "original"` to the Responses API instead of using the legacy resize/compress path. ## What changed - Added `view_image_original_resolution` as an under-development feature flag. - Added `ImageDetail` to the protocol models and support for serializing `detail: "original"` on tool-returned images. - Added `PromptImageMode::Original` to `codex-utils-image`. - Preserves original PNG/JPEG/WebP bytes. - Keeps legacy behavior for the resize path. - Updated `view_image` to: - use the shared `local_image_content_items_with_label_number(...)` helper in both code paths - select original-resolution mode only when: - the feature flag is enabled, and - the model slug parses as `gpt-5.3-codex` or newer - Kept local user image attachments on the existing resize path; this change is specific to `view_image`. - Updated history/image accounting so only `detail: "original"` images use the docs-based GPT-5 image cost calculation; legacy images still use the old fixed estimate. - Added JS REPL guidance, gated on the same feature flag, to prefer JPEG at 85% quality unless lossless is required, while still allowing other formats when explicitly requested. - Updated tests and helper code that construct `FunctionCallOutputContentItem::InputImage` to carry the new `detail` field. ## Behavior ### Feature off - `view_image` keeps the existing resize/re-encode behavior. - History estimation keeps the existing fixed-cost heuristic. ### Feature on + `gpt-5.3-codex+` - `view_image` sends original-resolution images with `detail: "original"`. - PNG/JPEG/WebP source bytes are preserved when possible. - History estimation uses the GPT-5 docs-based image-cost calculation for those `detail: "original"` images. #### [git stack](https://github.com/magus/git-stack-cli) - 👉 `1` https://github.com/openai/codex/pull/13050 - ⏳ `2` https://github.com/openai/codex/pull/13331 - ⏳ `3` https://github.com/openai/codex/pull/13049
2026-05-02 18:37:01 +00:00 · 2026-03-03 15:56:54 -08:00
parent 935754baa3
commit b92146d48b
37 changed files with 794 additions and 107 deletions
--- a/codex-rs/utils/image/src/lib.rs
+++ b/codex-rs/utils/image/src/lib.rs
@@ -14,6 +14,7 @@ use image::ImageEncoder;
 use image::ImageFormat;
 use image::codecs::jpeg::JpegEncoder;
 use image::codecs::png::PngEncoder;
+use image::codecs::webp::WebPEncoder;
 use image::imageops::FilterType;
 /// Maximum width used when resizing images before uploading.
 pub const MAX_WIDTH: u32 = 2048;
@@ -33,24 +34,48 @@ pub struct EncodedImage {
 impl EncodedImage {
    pub fn into_data_url(self) -> String {
        let encoded = BASE64_STANDARD.encode(&self.bytes);
-        format!("data:{};base64,{}", self.mime, encoded)
+        format!("data:{};base64,{encoded}", self.mime)
    }
 }

-static IMAGE_CACHE: LazyLock<BlockingLruCache<[u8; 20], EncodedImage>> =
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum PromptImageMode {
+    ResizeToFit,
+    Original,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+struct ImageCacheKey {
+    digest: [u8; 20],
+    mode: PromptImageMode,
+}
+
+static IMAGE_CACHE: LazyLock<BlockingLruCache<ImageCacheKey, EncodedImage>> =
    LazyLock::new(|| BlockingLruCache::new(NonZeroUsize::new(32).unwrap_or(NonZeroUsize::MIN)));

 pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessingError> {
+    load_for_prompt(path, PromptImageMode::ResizeToFit)
+}
+
+pub fn load_for_prompt(
+    path: &Path,
+    mode: PromptImageMode,
+) -> Result<EncodedImage, ImageProcessingError> {
    let path_buf = path.to_path_buf();

    let file_bytes = read_file_bytes(path, &path_buf)?;

-    let key = sha1_digest(&file_bytes);
+    let key = ImageCacheKey {
+        digest: sha1_digest(&file_bytes),
+        mode,
+    };

    IMAGE_CACHE.get_or_try_insert_with(key, move || {
        let format = match image::guess_format(&file_bytes) {
            Ok(ImageFormat::Png) => Some(ImageFormat::Png),
            Ok(ImageFormat::Jpeg) => Some(ImageFormat::Jpeg),
+            Ok(ImageFormat::Gif) => Some(ImageFormat::Gif),
+            Ok(ImageFormat::WebP) => Some(ImageFormat::WebP),
            _ => None,
        };

@@ -63,42 +88,54 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessi

        let (width, height) = dynamic.dimensions();

-        let encoded = if width <= MAX_WIDTH && height <= MAX_HEIGHT {
-            if let Some(format) = format {
-                let mime = format_to_mime(format);
-                EncodedImage {
-                    bytes: file_bytes,
-                    mime,
-                    width,
-                    height,
+        let encoded =
+            if mode == PromptImageMode::Original || (width <= MAX_WIDTH && height <= MAX_HEIGHT) {
+                if let Some(format) = format.filter(|format| can_preserve_source_bytes(*format)) {
+                    let mime = format_to_mime(format);
+                    EncodedImage {
+                        bytes: file_bytes,
+                        mime,
+                        width,
+                        height,
+                    }
+                } else {
+                    let (bytes, output_format) = encode_image(&dynamic, ImageFormat::Png)?;
+                    let mime = format_to_mime(output_format);
+                    EncodedImage {
+                        bytes,
+                        mime,
+                        width,
+                        height,
+                    }
                }
            } else {
-                let (bytes, output_format) = encode_image(&dynamic, ImageFormat::Png)?;
+                let resized = dynamic.resize(MAX_WIDTH, MAX_HEIGHT, FilterType::Triangle);
+                let target_format = format
+                    .filter(|format| can_preserve_source_bytes(*format))
+                    .unwrap_or(ImageFormat::Png);
+                let (bytes, output_format) = encode_image(&resized, target_format)?;
                let mime = format_to_mime(output_format);
                EncodedImage {
                    bytes,
                    mime,
-                    width,
-                    height,
+                    width: resized.width(),
+                    height: resized.height(),
                }
-            }
-        } else {
-            let resized = dynamic.resize(MAX_WIDTH, MAX_HEIGHT, FilterType::Triangle);
-            let target_format = format.unwrap_or(ImageFormat::Png);
-            let (bytes, output_format) = encode_image(&resized, target_format)?;
-            let mime = format_to_mime(output_format);
-            EncodedImage {
-                bytes,
-                mime,
-                width: resized.width(),
-                height: resized.height(),
-            }
-        };
+            };

        Ok(encoded)
    })
 }

+fn can_preserve_source_bytes(format: ImageFormat) -> bool {
+    // Public API docs explicitly call out non-animated GIF support only.
+    // Preserve byte-for-byte only for formats we can safely pass through.
+    matches!(
+        format,
+        ImageFormat::Png | ImageFormat::Jpeg | ImageFormat::WebP
+    )
+}
+
 fn read_file_bytes(path: &Path, path_for_error: &Path) -> Result<Vec<u8>, ImageProcessingError> {
    match tokio::runtime::Handle::try_current() {
        // If we're inside a Tokio runtime, avoid block_on (it panics on worker threads).
@@ -123,6 +160,7 @@ fn encode_image(
 ) -> Result<(Vec<u8>, ImageFormat), ImageProcessingError> {
    let target_format = match preferred_format {
        ImageFormat::Jpeg => ImageFormat::Jpeg,
+        ImageFormat::WebP => ImageFormat::WebP,
        _ => ImageFormat::Png,
    };

@@ -153,6 +191,21 @@ fn encode_image(
                    source,
                })?;
        }
+        ImageFormat::WebP => {
+            let rgba = image.to_rgba8();
+            let encoder = WebPEncoder::new_lossless(&mut buffer);
+            encoder
+                .write_image(
+                    rgba.as_raw(),
+                    image.width(),
+                    image.height(),
+                    ColorType::Rgba8.into(),
+                )
+                .map_err(|source| ImageProcessingError::Encode {
+                    format: target_format,
+                    source,
+                })?;
+        }
        _ => unreachable!("unsupported target_format should have been handled earlier"),
    }

@@ -162,6 +215,8 @@ fn encode_image(
 fn format_to_mime(format: ImageFormat) -> String {
    match format {
        ImageFormat::Jpeg => "image/jpeg".to_string(),
+        ImageFormat::Gif => "image/gif".to_string(),
+        ImageFormat::WebP => "image/webp".to_string(),
        _ => "image/png".to_string(),
    }
 }
@@ -176,38 +231,70 @@ mod tests {

    #[tokio::test(flavor = "multi_thread")]
    async fn returns_original_image_when_within_bounds() {
+        for (format, mime) in [
+            (ImageFormat::Png, "image/png"),
+            (ImageFormat::WebP, "image/webp"),
+        ] {
+            let temp_file = NamedTempFile::new().expect("temp file");
+            let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
+            image
+                .save_with_format(temp_file.path(), format)
+                .expect("write image to temp file");
+
+            let original_bytes = std::fs::read(temp_file.path()).expect("read written image");
+            let encoded = load_and_resize_to_fit(temp_file.path()).expect("process image");
+
+            assert_eq!(encoded.width, 64);
+            assert_eq!(encoded.height, 32);
+            assert_eq!(encoded.mime, mime);
+            assert_eq!(encoded.bytes, original_bytes);
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn downscales_large_image() {
+        for (format, mime) in [
+            (ImageFormat::Png, "image/png"),
+            (ImageFormat::WebP, "image/webp"),
+        ] {
+            let temp_file = NamedTempFile::new().expect("temp file");
+            let image = ImageBuffer::from_pixel(4096, 2048, Rgba([200u8, 10, 10, 255]));
+            image
+                .save_with_format(temp_file.path(), format)
+                .expect("write image to temp file");
+
+            let processed = load_and_resize_to_fit(temp_file.path()).expect("process image");
+
+            assert!(processed.width <= MAX_WIDTH);
+            assert!(processed.height <= MAX_HEIGHT);
+            assert_eq!(processed.mime, mime);
+
+            let detected_format =
+                image::guess_format(&processed.bytes).expect("detect resized output format");
+            assert_eq!(detected_format, format);
+
+            let loaded = image::load_from_memory(&processed.bytes)
+                .expect("read resized bytes back into image");
+            assert_eq!(loaded.dimensions(), (processed.width, processed.height));
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn preserves_large_image_in_original_mode() {
        let temp_file = NamedTempFile::new().expect("temp file");
-        let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
+        let image = ImageBuffer::from_pixel(4096, 2048, Rgba([180u8, 30, 30, 255]));
        image
            .save_with_format(temp_file.path(), ImageFormat::Png)
            .expect("write png to temp file");

        let original_bytes = std::fs::read(temp_file.path()).expect("read written image");
+        let processed =
+            load_for_prompt(temp_file.path(), PromptImageMode::Original).expect("process image");

-        let encoded = load_and_resize_to_fit(temp_file.path()).expect("process image");
-
-        assert_eq!(encoded.width, 64);
-        assert_eq!(encoded.height, 32);
-        assert_eq!(encoded.mime, "image/png");
-        assert_eq!(encoded.bytes, original_bytes);
-    }
-
-    #[tokio::test(flavor = "multi_thread")]
-    async fn downscales_large_image() {
-        let temp_file = NamedTempFile::new().expect("temp file");
-        let image = ImageBuffer::from_pixel(4096, 2048, Rgba([200u8, 10, 10, 255]));
-        image
-            .save_with_format(temp_file.path(), ImageFormat::Png)
-            .expect("write png to temp file");
-
-        let processed = load_and_resize_to_fit(temp_file.path()).expect("process image");
-
-        assert!(processed.width <= MAX_WIDTH);
-        assert!(processed.height <= MAX_HEIGHT);
-
-        let loaded =
-            image::load_from_memory(&processed.bytes).expect("read resized bytes back into image");
-        assert_eq!(loaded.dimensions(), (processed.width, processed.height));
+        assert_eq!(processed.width, 4096);
+        assert_eq!(processed.height, 2048);
+        assert_eq!(processed.mime, "image/png");
+        assert_eq!(processed.bytes, original_bytes);
    }

    #[tokio::test(flavor = "multi_thread")]