mirror of
https://github.com/openai/codex.git
synced 2026-05-02 18:37:01 +00:00
Add under-development original-resolution view_image support (#13050)
## Summary
Add original-resolution support for `view_image` behind the
under-development `view_image_original_resolution` feature flag.
When the flag is enabled and the target model is `gpt-5.3-codex` or
newer, `view_image` now preserves original PNG/JPEG/WebP bytes and sends
`detail: "original"` to the Responses API instead of using the legacy
resize/compress path.
## What changed
- Added `view_image_original_resolution` as an under-development feature
flag.
- Added `ImageDetail` to the protocol models and support for serializing
`detail: "original"` on tool-returned images.
- Added `PromptImageMode::Original` to `codex-utils-image`.
- Preserves original PNG/JPEG/WebP bytes.
- Keeps legacy behavior for the resize path.
- Updated `view_image` to:
- use the shared `local_image_content_items_with_label_number(...)`
helper in both code paths
- select original-resolution mode only when:
- the feature flag is enabled, and
- the model slug parses as `gpt-5.3-codex` or newer
- Kept local user image attachments on the existing resize path; this
change is specific to `view_image`.
- Updated history/image accounting so only `detail: "original"` images
use the docs-based GPT-5 image cost calculation; legacy images still use
the old fixed estimate.
- Added JS REPL guidance, gated on the same feature flag, to prefer JPEG
at 85% quality unless lossless is required, while still allowing other
formats when explicitly requested.
- Updated tests and helper code that construct
`FunctionCallOutputContentItem::InputImage` to carry the new `detail`
field.
## Behavior
### Feature off
- `view_image` keeps the existing resize/re-encode behavior.
- History estimation keeps the existing fixed-cost heuristic.
### Feature on + `gpt-5.3-codex+`
- `view_image` sends original-resolution images with `detail:
"original"`.
- PNG/JPEG/WebP source bytes are preserved when possible.
- History estimation uses the GPT-5 docs-based image-cost calculation
for those `detail: "original"` images.
#### [git stack](https://github.com/magus/git-stack-cli)
- 👉 `1` https://github.com/openai/codex/pull/13050
- ⏳ `2` https://github.com/openai/codex/pull/13331
- ⏳ `3` https://github.com/openai/codex/pull/13049
This commit is contained in:
committed by
GitHub
parent
935754baa3
commit
b92146d48b
@@ -14,6 +14,7 @@ use image::ImageEncoder;
|
||||
use image::ImageFormat;
|
||||
use image::codecs::jpeg::JpegEncoder;
|
||||
use image::codecs::png::PngEncoder;
|
||||
use image::codecs::webp::WebPEncoder;
|
||||
use image::imageops::FilterType;
|
||||
/// Maximum width used when resizing images before uploading.
|
||||
pub const MAX_WIDTH: u32 = 2048;
|
||||
@@ -33,24 +34,48 @@ pub struct EncodedImage {
|
||||
impl EncodedImage {
|
||||
pub fn into_data_url(self) -> String {
|
||||
let encoded = BASE64_STANDARD.encode(&self.bytes);
|
||||
format!("data:{};base64,{}", self.mime, encoded)
|
||||
format!("data:{};base64,{encoded}", self.mime)
|
||||
}
|
||||
}
|
||||
|
||||
static IMAGE_CACHE: LazyLock<BlockingLruCache<[u8; 20], EncodedImage>> =
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum PromptImageMode {
|
||||
ResizeToFit,
|
||||
Original,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
struct ImageCacheKey {
|
||||
digest: [u8; 20],
|
||||
mode: PromptImageMode,
|
||||
}
|
||||
|
||||
static IMAGE_CACHE: LazyLock<BlockingLruCache<ImageCacheKey, EncodedImage>> =
|
||||
LazyLock::new(|| BlockingLruCache::new(NonZeroUsize::new(32).unwrap_or(NonZeroUsize::MIN)));
|
||||
|
||||
pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessingError> {
|
||||
load_for_prompt(path, PromptImageMode::ResizeToFit)
|
||||
}
|
||||
|
||||
pub fn load_for_prompt(
|
||||
path: &Path,
|
||||
mode: PromptImageMode,
|
||||
) -> Result<EncodedImage, ImageProcessingError> {
|
||||
let path_buf = path.to_path_buf();
|
||||
|
||||
let file_bytes = read_file_bytes(path, &path_buf)?;
|
||||
|
||||
let key = sha1_digest(&file_bytes);
|
||||
let key = ImageCacheKey {
|
||||
digest: sha1_digest(&file_bytes),
|
||||
mode,
|
||||
};
|
||||
|
||||
IMAGE_CACHE.get_or_try_insert_with(key, move || {
|
||||
let format = match image::guess_format(&file_bytes) {
|
||||
Ok(ImageFormat::Png) => Some(ImageFormat::Png),
|
||||
Ok(ImageFormat::Jpeg) => Some(ImageFormat::Jpeg),
|
||||
Ok(ImageFormat::Gif) => Some(ImageFormat::Gif),
|
||||
Ok(ImageFormat::WebP) => Some(ImageFormat::WebP),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
@@ -63,42 +88,54 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessi
|
||||
|
||||
let (width, height) = dynamic.dimensions();
|
||||
|
||||
let encoded = if width <= MAX_WIDTH && height <= MAX_HEIGHT {
|
||||
if let Some(format) = format {
|
||||
let mime = format_to_mime(format);
|
||||
EncodedImage {
|
||||
bytes: file_bytes,
|
||||
mime,
|
||||
width,
|
||||
height,
|
||||
let encoded =
|
||||
if mode == PromptImageMode::Original || (width <= MAX_WIDTH && height <= MAX_HEIGHT) {
|
||||
if let Some(format) = format.filter(|format| can_preserve_source_bytes(*format)) {
|
||||
let mime = format_to_mime(format);
|
||||
EncodedImage {
|
||||
bytes: file_bytes,
|
||||
mime,
|
||||
width,
|
||||
height,
|
||||
}
|
||||
} else {
|
||||
let (bytes, output_format) = encode_image(&dynamic, ImageFormat::Png)?;
|
||||
let mime = format_to_mime(output_format);
|
||||
EncodedImage {
|
||||
bytes,
|
||||
mime,
|
||||
width,
|
||||
height,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (bytes, output_format) = encode_image(&dynamic, ImageFormat::Png)?;
|
||||
let resized = dynamic.resize(MAX_WIDTH, MAX_HEIGHT, FilterType::Triangle);
|
||||
let target_format = format
|
||||
.filter(|format| can_preserve_source_bytes(*format))
|
||||
.unwrap_or(ImageFormat::Png);
|
||||
let (bytes, output_format) = encode_image(&resized, target_format)?;
|
||||
let mime = format_to_mime(output_format);
|
||||
EncodedImage {
|
||||
bytes,
|
||||
mime,
|
||||
width,
|
||||
height,
|
||||
width: resized.width(),
|
||||
height: resized.height(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let resized = dynamic.resize(MAX_WIDTH, MAX_HEIGHT, FilterType::Triangle);
|
||||
let target_format = format.unwrap_or(ImageFormat::Png);
|
||||
let (bytes, output_format) = encode_image(&resized, target_format)?;
|
||||
let mime = format_to_mime(output_format);
|
||||
EncodedImage {
|
||||
bytes,
|
||||
mime,
|
||||
width: resized.width(),
|
||||
height: resized.height(),
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
Ok(encoded)
|
||||
})
|
||||
}
|
||||
|
||||
fn can_preserve_source_bytes(format: ImageFormat) -> bool {
|
||||
// Public API docs explicitly call out non-animated GIF support only.
|
||||
// Preserve byte-for-byte only for formats we can safely pass through.
|
||||
matches!(
|
||||
format,
|
||||
ImageFormat::Png | ImageFormat::Jpeg | ImageFormat::WebP
|
||||
)
|
||||
}
|
||||
|
||||
fn read_file_bytes(path: &Path, path_for_error: &Path) -> Result<Vec<u8>, ImageProcessingError> {
|
||||
match tokio::runtime::Handle::try_current() {
|
||||
// If we're inside a Tokio runtime, avoid block_on (it panics on worker threads).
|
||||
@@ -123,6 +160,7 @@ fn encode_image(
|
||||
) -> Result<(Vec<u8>, ImageFormat), ImageProcessingError> {
|
||||
let target_format = match preferred_format {
|
||||
ImageFormat::Jpeg => ImageFormat::Jpeg,
|
||||
ImageFormat::WebP => ImageFormat::WebP,
|
||||
_ => ImageFormat::Png,
|
||||
};
|
||||
|
||||
@@ -153,6 +191,21 @@ fn encode_image(
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
ImageFormat::WebP => {
|
||||
let rgba = image.to_rgba8();
|
||||
let encoder = WebPEncoder::new_lossless(&mut buffer);
|
||||
encoder
|
||||
.write_image(
|
||||
rgba.as_raw(),
|
||||
image.width(),
|
||||
image.height(),
|
||||
ColorType::Rgba8.into(),
|
||||
)
|
||||
.map_err(|source| ImageProcessingError::Encode {
|
||||
format: target_format,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
_ => unreachable!("unsupported target_format should have been handled earlier"),
|
||||
}
|
||||
|
||||
@@ -162,6 +215,8 @@ fn encode_image(
|
||||
fn format_to_mime(format: ImageFormat) -> String {
|
||||
match format {
|
||||
ImageFormat::Jpeg => "image/jpeg".to_string(),
|
||||
ImageFormat::Gif => "image/gif".to_string(),
|
||||
ImageFormat::WebP => "image/webp".to_string(),
|
||||
_ => "image/png".to_string(),
|
||||
}
|
||||
}
|
||||
@@ -176,38 +231,70 @@ mod tests {
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn returns_original_image_when_within_bounds() {
|
||||
for (format, mime) in [
|
||||
(ImageFormat::Png, "image/png"),
|
||||
(ImageFormat::WebP, "image/webp"),
|
||||
] {
|
||||
let temp_file = NamedTempFile::new().expect("temp file");
|
||||
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
|
||||
image
|
||||
.save_with_format(temp_file.path(), format)
|
||||
.expect("write image to temp file");
|
||||
|
||||
let original_bytes = std::fs::read(temp_file.path()).expect("read written image");
|
||||
let encoded = load_and_resize_to_fit(temp_file.path()).expect("process image");
|
||||
|
||||
assert_eq!(encoded.width, 64);
|
||||
assert_eq!(encoded.height, 32);
|
||||
assert_eq!(encoded.mime, mime);
|
||||
assert_eq!(encoded.bytes, original_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn downscales_large_image() {
|
||||
for (format, mime) in [
|
||||
(ImageFormat::Png, "image/png"),
|
||||
(ImageFormat::WebP, "image/webp"),
|
||||
] {
|
||||
let temp_file = NamedTempFile::new().expect("temp file");
|
||||
let image = ImageBuffer::from_pixel(4096, 2048, Rgba([200u8, 10, 10, 255]));
|
||||
image
|
||||
.save_with_format(temp_file.path(), format)
|
||||
.expect("write image to temp file");
|
||||
|
||||
let processed = load_and_resize_to_fit(temp_file.path()).expect("process image");
|
||||
|
||||
assert!(processed.width <= MAX_WIDTH);
|
||||
assert!(processed.height <= MAX_HEIGHT);
|
||||
assert_eq!(processed.mime, mime);
|
||||
|
||||
let detected_format =
|
||||
image::guess_format(&processed.bytes).expect("detect resized output format");
|
||||
assert_eq!(detected_format, format);
|
||||
|
||||
let loaded = image::load_from_memory(&processed.bytes)
|
||||
.expect("read resized bytes back into image");
|
||||
assert_eq!(loaded.dimensions(), (processed.width, processed.height));
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn preserves_large_image_in_original_mode() {
|
||||
let temp_file = NamedTempFile::new().expect("temp file");
|
||||
let image = ImageBuffer::from_pixel(64, 32, Rgba([10u8, 20, 30, 255]));
|
||||
let image = ImageBuffer::from_pixel(4096, 2048, Rgba([180u8, 30, 30, 255]));
|
||||
image
|
||||
.save_with_format(temp_file.path(), ImageFormat::Png)
|
||||
.expect("write png to temp file");
|
||||
|
||||
let original_bytes = std::fs::read(temp_file.path()).expect("read written image");
|
||||
let processed =
|
||||
load_for_prompt(temp_file.path(), PromptImageMode::Original).expect("process image");
|
||||
|
||||
let encoded = load_and_resize_to_fit(temp_file.path()).expect("process image");
|
||||
|
||||
assert_eq!(encoded.width, 64);
|
||||
assert_eq!(encoded.height, 32);
|
||||
assert_eq!(encoded.mime, "image/png");
|
||||
assert_eq!(encoded.bytes, original_bytes);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn downscales_large_image() {
|
||||
let temp_file = NamedTempFile::new().expect("temp file");
|
||||
let image = ImageBuffer::from_pixel(4096, 2048, Rgba([200u8, 10, 10, 255]));
|
||||
image
|
||||
.save_with_format(temp_file.path(), ImageFormat::Png)
|
||||
.expect("write png to temp file");
|
||||
|
||||
let processed = load_and_resize_to_fit(temp_file.path()).expect("process image");
|
||||
|
||||
assert!(processed.width <= MAX_WIDTH);
|
||||
assert!(processed.height <= MAX_HEIGHT);
|
||||
|
||||
let loaded =
|
||||
image::load_from_memory(&processed.bytes).expect("read resized bytes back into image");
|
||||
assert_eq!(loaded.dimensions(), (processed.width, processed.height));
|
||||
assert_eq!(processed.width, 4096);
|
||||
assert_eq!(processed.height, 2048);
|
||||
assert_eq!(processed.mime, "image/png");
|
||||
assert_eq!(processed.bytes, original_bytes);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
|
||||
Reference in New Issue
Block a user