Compare commits

...

3 Commits

Author SHA1 Message Date
jif-oai
ddd46d8f99 better encoding 2026-01-09 12:06:06 +00:00
jif-oai
7bc77bdd0c shear 2026-01-09 11:31:33 +00:00
jif-oai
6d467a306d fix: encoding of image URL 2026-01-09 11:07:43 +00:00
3 changed files with 223 additions and 20 deletions

View File

@@ -1,5 +1,8 @@
use std::collections::HashMap;
use std::path::PathBuf;
use codex_utils_image::decode_data_url;
use codex_utils_image::load_and_resize_bytes;
use codex_utils_image::load_and_resize_to_fit;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
@@ -203,6 +206,24 @@ fn unsupported_image_error_placeholder(path: &std::path::Path, mime: &str) -> Co
}
}
fn inline_image_error_placeholder(error: impl std::fmt::Display) -> ContentItem {
ContentItem::InputText {
text: format!("Codex could not read the provided image data: {error}"),
}
}
fn invalid_inline_image_error_placeholder(error: impl std::fmt::Display) -> ContentItem {
ContentItem::InputText {
text: format!("Provided image data is invalid: {error}"),
}
}
fn unsupported_inline_image_error_placeholder(mime: &str) -> ContentItem {
ContentItem::InputText {
text: format!("Codex cannot attach image data: unsupported image format `{mime}`."),
}
}
impl From<ResponseInputItem> for ResponseItem {
fn from(item: ResponseInputItem) -> Self {
match item {
@@ -302,7 +323,37 @@ impl From<Vec<UserInput>> for ResponseInputItem {
.into_iter()
.filter_map(|c| match c {
UserInput::Text { text } => Some(ContentItem::InputText { text }),
UserInput::Image { image_url } => Some(ContentItem::InputImage { image_url }),
UserInput::Image { image_url } => {
if image_url.starts_with("data:") {
let inline = match decode_data_url(&image_url) {
Ok(inline) => inline,
Err(err) => return Some(inline_image_error_placeholder(err)),
};
if let Some(mime) = inline.mime.as_deref()
&& !mime.starts_with("image/")
&& mime != "application/octet-stream"
{
return Some(unsupported_inline_image_error_placeholder(mime));
}
match load_and_resize_bytes(
inline.bytes,
PathBuf::from("<inline image>"),
) {
Ok(image) => Some(ContentItem::InputImage {
image_url: image.into_data_url(),
}),
Err(err) => {
if err.is_invalid_image() {
Some(invalid_inline_image_error_placeholder(err))
} else {
Some(inline_image_error_placeholder(err))
}
}
}
} else {
Some(ContentItem::InputImage { image_url })
}
}
UserInput::LocalImage { path } => match load_and_resize_to_fit(&path) {
Ok(image) => Some(ContentItem::InputImage {
image_url: image.into_data_url(),
@@ -556,6 +607,89 @@ mod tests {
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[test]
fn data_url_images_are_processed_locally() {
let data_url = "".to_string();
let item = ResponseInputItem::from(vec![UserInput::Image {
image_url: data_url,
}]);
let ResponseInputItem::Message { content, .. } = item else {
panic!("expected message response input");
};
match content.as_slice() {
[ContentItem::InputImage { image_url }] => {
assert!(image_url.starts_with("data:image/"));
}
[ContentItem::InputText { text }] => {
panic!("expected input image, got placeholder: {text}");
}
_ => panic!("expected single input content item"),
}
}
#[test]
fn data_url_with_generic_mime_is_processed_locally() {
let payload = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg==";
let item = ResponseInputItem::from(vec![UserInput::Image {
image_url: format!("data:application/octet-stream;base64,{}", payload),
}]);
let ResponseInputItem::Message { content, .. } = item else {
panic!("expected message response input");
};
match content.as_slice() {
[ContentItem::InputImage { image_url }] => {
assert!(image_url.starts_with("data:image/"));
}
[ContentItem::InputText { text }] => {
panic!("expected input image, got placeholder: {text}");
}
_ => panic!("expected single input content item"),
}
}
#[test]
fn data_url_with_missing_mime_is_processed_locally() {
let payload = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg==";
let item = ResponseInputItem::from(vec![UserInput::Image {
image_url: format!("data:;base64,{}", payload),
}]);
let ResponseInputItem::Message { content, .. } = item else {
panic!("expected message response input");
};
match content.as_slice() {
[ContentItem::InputImage { image_url }] => {
assert!(image_url.starts_with("data:image/"));
}
[ContentItem::InputText { text }] => {
panic!("expected input image, got placeholder: {text}");
}
_ => panic!("expected single input content item"),
}
}
#[test]
fn data_url_with_non_image_mime_renders_placeholder() {
let item = ResponseInputItem::from(vec![UserInput::Image {
image_url: "data:text/plain;base64,SGVsbG8=".to_string(),
}]);
let ResponseInputItem::Message { content, .. } = item else {
panic!("expected message response input");
};
let [ContentItem::InputText { text }] = content.as_slice() else {
panic!("expected single input text content item");
};
assert_eq!(
text,
"Codex cannot attach image data: unsupported image format `text/plain`."
);
}
#[test]
fn serializes_success_as_plain_string() -> Result<()> {
let item = ResponseInputItem::FunctionCallOutput {

View File

@@ -23,6 +23,8 @@ pub enum ImageProcessingError {
#[source]
source: image::ImageError,
},
#[error("invalid data URL: {message}")]
DataUrl { message: String },
}
impl ImageProcessingError {

View File

@@ -1,5 +1,6 @@
use std::num::NonZeroUsize;
use std::path::Path;
use std::path::PathBuf;
use std::sync::LazyLock;
use crate::error::ImageProcessingError;
@@ -30,6 +31,12 @@ pub struct EncodedImage {
pub height: u32,
}
#[derive(Debug, Clone)]
pub struct InlineImageData {
pub mime: Option<String>,
pub bytes: Vec<u8>,
}
impl EncodedImage {
pub fn into_data_url(self) -> String {
let encoded = BASE64_STANDARD.encode(&self.bytes);
@@ -45,6 +52,84 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessi
let file_bytes = read_file_bytes(path, &path_buf)?;
process_image_bytes(file_bytes, path_buf)
}
pub fn load_and_resize_bytes(
bytes: Vec<u8>,
path_for_error: PathBuf,
) -> Result<EncodedImage, ImageProcessingError> {
process_image_bytes(bytes, path_for_error)
}
pub fn decode_data_url(image_url: &str) -> Result<InlineImageData, ImageProcessingError> {
let Some(rest) = image_url.strip_prefix("data:") else {
return Err(ImageProcessingError::DataUrl {
message: "missing data URL prefix".to_string(),
});
};
let Some((header, data)) = rest.split_once(',') else {
return Err(ImageProcessingError::DataUrl {
message: "missing data URL header separator".to_string(),
});
};
let mut mime = None;
let mut is_base64 = false;
let mut parts = header.split(';');
if let Some(first) = parts.next()
&& !first.is_empty()
{
mime = Some(first.to_string());
}
for part in parts {
if part == "base64" {
is_base64 = true;
}
}
if !is_base64 {
return Err(ImageProcessingError::DataUrl {
message: "data URL is not base64 encoded".to_string(),
});
}
let decoded =
BASE64_STANDARD
.decode(data.trim())
.map_err(|err| ImageProcessingError::DataUrl {
message: format!("invalid base64 image data: {err}"),
})?;
Ok(InlineImageData {
mime,
bytes: decoded,
})
}
fn read_file_bytes(path: &Path, path_for_error: &Path) -> Result<Vec<u8>, ImageProcessingError> {
match tokio::runtime::Handle::try_current() {
// If we're inside a Tokio runtime, avoid block_on (it panics on worker threads).
// Use block_in_place and do a standard blocking read safely.
Ok(_) => tokio::task::block_in_place(|| std::fs::read(path)).map_err(|source| {
ImageProcessingError::Read {
path: path_for_error.to_path_buf(),
source,
}
}),
// Outside a runtime, just read synchronously.
Err(_) => std::fs::read(path).map_err(|source| ImageProcessingError::Read {
path: path_for_error.to_path_buf(),
source,
}),
}
}
fn process_image_bytes(
file_bytes: Vec<u8>,
path_for_error: PathBuf,
) -> Result<EncodedImage, ImageProcessingError> {
let key = sha1_digest(&file_bytes);
IMAGE_CACHE.get_or_try_insert_with(key, move || {
@@ -56,7 +141,7 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessi
let dynamic = image::load_from_memory(&file_bytes).map_err(|source| {
ImageProcessingError::Decode {
path: path_buf.clone(),
path: path_for_error.clone(),
source,
}
})?;
@@ -99,24 +184,6 @@ pub fn load_and_resize_to_fit(path: &Path) -> Result<EncodedImage, ImageProcessi
})
}
fn read_file_bytes(path: &Path, path_for_error: &Path) -> Result<Vec<u8>, ImageProcessingError> {
match tokio::runtime::Handle::try_current() {
// If we're inside a Tokio runtime, avoid block_on (it panics on worker threads).
// Use block_in_place and do a standard blocking read safely.
Ok(_) => tokio::task::block_in_place(|| std::fs::read(path)).map_err(|source| {
ImageProcessingError::Read {
path: path_for_error.to_path_buf(),
source,
}
}),
// Outside a runtime, just read synchronously.
Err(_) => std::fs::read(path).map_err(|source| ImageProcessingError::Read {
path: path_for_error.to_path_buf(),
source,
}),
}
}
fn encode_image(
image: &DynamicImage,
preferred_format: ImageFormat,