Compare commits

...

1 Commits

Author SHA1 Message Date
Roy Han
b88e4ec7bc reject empty base64 image inputs 2026-05-24 20:17:37 -07:00
7 changed files with 238 additions and 24 deletions

View File

@@ -5,6 +5,7 @@ pub(crate) const METHOD_NOT_FOUND_ERROR_CODE: i64 = -32601;
pub const INVALID_PARAMS_ERROR_CODE: i64 = -32602;
pub(crate) const INTERNAL_ERROR_CODE: i64 = -32603;
pub(crate) const OVERLOADED_ERROR_CODE: i64 = -32001;
pub(crate) const EMPTY_BASE64_IMAGE_INPUT_ERROR_CODE: &str = "empty_base64_image_input";
pub const INPUT_TOO_LARGE_ERROR_CODE: &str = "input_too_large";
pub(crate) fn invalid_request(message: impl Into<String>) -> JSONRPCErrorError {

View File

@@ -3,6 +3,7 @@ use crate::bespoke_event_handling::maybe_emit_hook_prompt_item_completed;
use crate::command_exec::CommandExecManager;
use crate::command_exec::StartCommandExecParams;
use crate::config_manager::ConfigManager;
use crate::error_code::EMPTY_BASE64_IMAGE_INPUT_ERROR_CODE;
use crate::error_code::INPUT_TOO_LARGE_ERROR_CODE;
use crate::error_code::invalid_params;
use crate::models::supported_models;

View File

@@ -46,6 +46,14 @@ struct ThreadSettingsBuildParams {
personality: Option<Personality>,
}
fn is_empty_base64_image_input(item: &V2UserInput) -> bool {
matches!(
item,
V2UserInput::Image { url, .. }
if codex_protocol::models::is_empty_base64_image_data_url(url)
)
}
impl TurnRequestProcessor {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
@@ -344,6 +352,14 @@ impl TurnRequestProcessor {
error
}
fn empty_base64_image_input_error() -> JSONRPCErrorError {
let mut error = invalid_params("Could not attach image: empty image data.");
error.data = Some(serde_json::json!({
"input_error_code": EMPTY_BASE64_IMAGE_INPUT_ERROR_CODE,
}));
error
}
fn validate_v2_input_limit(items: &[V2UserInput]) -> Result<(), JSONRPCErrorError> {
let actual_chars: usize = items.iter().map(V2UserInput::text_char_count).sum();
if actual_chars > MAX_USER_INPUT_TEXT_CHARS {
@@ -367,6 +383,11 @@ impl TurnRequestProcessor {
);
return Err(error);
}
if params.input.iter().any(is_empty_base64_image_input) {
let error = Self::empty_base64_image_input_error();
self.track_error_response(&request_id, &error, /*error_type*/ None);
return Err(error);
}
let (thread_id, thread) =
self.load_thread(&params.thread_id)
.await
@@ -740,6 +761,11 @@ impl TurnRequestProcessor {
);
return Err(error);
}
if params.input.iter().any(is_empty_base64_image_input) {
let error = Self::empty_base64_image_input_error();
self.track_error_response(request_id, &error, /*error_type*/ None);
return Err(error);
}
let mapped_items: Vec<CoreInputItem> = params
.input

View File

@@ -954,6 +954,69 @@ async fn turn_start_rejects_combined_oversized_text_input() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn turn_start_rejects_empty_base64_image_before_starting_turn() -> Result<()> {
let codex_home = TempDir::new()?;
create_config_toml(
codex_home.path(),
"http://localhost/unused",
"never",
&BTreeMap::from([(Feature::Personality, true)]),
)?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let thread_req = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("mock-model".to_string()),
..Default::default()
})
.await?;
let thread_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
)
.await??;
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
let turn_req = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id,
input: vec![V2UserInput::Image {
detail: None,
url: "data:image/png;base64,".to_string(),
}],
..Default::default()
})
.await?;
let err: JSONRPCError = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_error_message(RequestId::Integer(turn_req)),
)
.await??;
assert_eq!(err.error.code, INVALID_PARAMS_ERROR_CODE);
assert_eq!(
err.error.message,
"Could not attach image: empty image data."
);
let data = err.error.data.expect("expected structured error data");
assert_eq!(data["input_error_code"], "empty_base64_image_input");
let turn_started = tokio::time::timeout(
std::time::Duration::from_millis(250),
mcp.read_stream_until_notification_message("turn/started"),
)
.await;
assert!(
turn_started.is_err(),
"did not expect a turn/started notification for rejected input"
);
Ok(())
}
#[tokio::test]
async fn turn_start_rejects_invalid_permission_selection_before_starting_turn() -> Result<()> {
let codex_home = TempDir::new()?;

View File

@@ -57,8 +57,8 @@ pub fn map_api_error(err: ApiError) -> CodexErr {
}
if status == http::StatusCode::BAD_REQUEST {
if let Ok(parsed) = serde_json::from_str::<Value>(&body_text)
&& let Some(error) = parsed.get("error")
let parsed = serde_json::from_str::<Value>(&body_text).ok();
if let Some(error) = parsed.as_ref().and_then(|parsed| parsed.get("error"))
&& error.get("code").and_then(Value::as_str)
== Some(CYBER_POLICY_ERROR_CODE)
{
@@ -69,8 +69,8 @@ pub fn map_api_error(err: ApiError) -> CodexErr {
.map(str::to_string)
.unwrap_or_else(|| CYBER_POLICY_FALLBACK_MESSAGE.to_string());
CodexErr::CyberPolicy { message }
} else if body_text
.contains("The image data you provided does not represent a valid image")
} else if body_text.contains(IMAGE_PARSE_ERROR)
|| body_text.contains(IMAGE_INVALID_BASE64)
{
CodexErr::InvalidImageRequest()
} else {
@@ -145,6 +145,8 @@ const X_ERROR_JSON_HEADER: &str = "x-error-json";
const CYBER_POLICY_ERROR_CODE: &str = "cyber_policy";
const CYBER_POLICY_FALLBACK_MESSAGE: &str =
"This request has been flagged for possible cybersecurity risk.";
const IMAGE_PARSE_ERROR: &str = "The image data you provided does not represent a valid image";
const IMAGE_INVALID_BASE64: &str = "Expected a base64-encoded data URL with an image MIME type";
#[cfg(test)]
#[path = "api_bridge_tests.rs"]

View File

@@ -124,6 +124,48 @@ fn map_api_error_keeps_unknown_400_errors_generic() {
assert_eq!(message, body);
}
#[test]
fn map_api_error_maps_empty_base64_image_error_message() {
const EMPTY_IMAGE_MESSAGE: &str = "Expected a base64-encoded data URL with an image MIME type \
(e.g. 'data:image/png;base64,aW1nIGJ5dGVzIGhlcmU='), but got empty base64-encoded bytes.";
let cases = [
("input[0].content[2].image_url", EMPTY_IMAGE_MESSAGE, true),
("input[111].output[1].image_url", EMPTY_IMAGE_MESSAGE, true),
("input[0].name", "Expected a valid tool name.", false),
];
for (param, message, maps_to_invalid_image) in cases {
let body = serde_json::json!({
"error": {
"type": "invalid_request_error",
"code": "invalid_value",
"message": format!("Invalid '{param}'. {message}"),
"param": param
}
})
.to_string();
let err = map_api_error(ApiError::Transport(TransportError::Http {
status: http::StatusCode::BAD_REQUEST,
url: Some("http://example.com/v1/responses".to_string()),
headers: None,
body: Some(body.clone()),
}));
if maps_to_invalid_image {
assert!(
matches!(err, CodexErr::InvalidImageRequest()),
"expected InvalidImageRequest for {param}, got {err:?}"
);
} else {
let CodexErr::InvalidRequest(message) = err else {
panic!("expected CodexErr::InvalidRequest for {param}, got {err:?}");
};
assert_eq!(message, body);
}
}
}
#[test]
fn map_api_error_maps_usage_limit_limit_name_header() {
let mut headers = HeaderMap::new();

View File

@@ -1062,6 +1062,22 @@ fn unsupported_image_error_placeholder(path: &std::path::Path, mime: &str) -> Co
}
}
const EMPTY_BASE64_TOOL_IMAGE_TEXT: &str =
"Tool returned an empty image; skipped sending it to the model.";
pub fn is_empty_base64_image_data_url(image_url: &str) -> bool {
let Some((metadata, payload)) = image_url.split_once(',') else {
return false;
};
let metadata = metadata.trim().to_ascii_lowercase();
metadata.starts_with("data:image/")
&& metadata
.split(';')
.any(|segment| segment.trim() == "base64")
&& payload.trim().is_empty()
}
pub fn local_image_content_items_with_label_number(
path: &std::path::Path,
file_bytes: Vec<u8>,
@@ -1231,14 +1247,15 @@ impl From<Vec<UserInput>> for ResponseInputItem {
UserInput::Image { image_url, detail } => {
image_index += 1;
let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL);
let image_item = ContentItem::InputImage {
image_url,
detail: Some(detail),
};
vec![
ContentItem::InputText {
text: image_open_tag_text(),
},
ContentItem::InputImage {
image_url,
detail: Some(detail),
},
image_item,
ContentItem::InputText {
text: image_close_tag_text(),
},
@@ -1361,9 +1378,15 @@ impl From<crate::dynamic_tools::DynamicToolCallOutputContentItem>
Self::InputText { text }
}
crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage { image_url } => {
Self::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
if is_empty_base64_image_data_url(&image_url) {
Self::InputText {
text: EMPTY_BASE64_TOOL_IMAGE_TEXT.to_string(),
}
} else {
Self::InputImage {
image_url,
detail: Some(DEFAULT_IMAGE_DETAIL),
}
}
}
}
@@ -1592,19 +1615,23 @@ fn convert_mcp_content_to_items(
let mime_type = mime_type.unwrap_or_else(|| "application/octet-stream".into());
format!("data:{mime_type};base64,{data}")
};
FunctionCallOutputContentItem::InputImage {
image_url,
detail: meta
.as_ref()
.and_then(serde_json::Value::as_object)
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
.and_then(serde_json::Value::as_str)
.and_then(|detail| match detail {
"high" => Some(ImageDetail::High),
"original" => Some(ImageDetail::Original),
_ => None,
})
.or(Some(DEFAULT_IMAGE_DETAIL)),
let detail = meta
.as_ref()
.and_then(serde_json::Value::as_object)
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
.and_then(serde_json::Value::as_str)
.and_then(|detail| match detail {
"high" => Some(ImageDetail::High),
"original" => Some(ImageDetail::Original),
_ => None,
})
.or(Some(DEFAULT_IMAGE_DETAIL));
if is_empty_base64_image_data_url(&image_url) {
FunctionCallOutputContentItem::InputText {
text: EMPTY_BASE64_TOOL_IMAGE_TEXT.to_string(),
}
} else {
FunctionCallOutputContentItem::InputImage { image_url, detail }
}
}
Ok(McpContent::Unknown) | Err(_) => FunctionCallOutputContentItem::InputText {
@@ -2210,6 +2237,24 @@ mod tests {
Ok(())
}
#[test]
fn empty_base64_dynamic_tool_image_output_is_replaced_with_text() -> Result<()> {
let item = FunctionCallOutputContentItem::from(
crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,".to_string(),
},
);
assert_eq!(
item,
FunctionCallOutputContentItem::InputText {
text: EMPTY_BASE64_TOOL_IMAGE_TEXT.to_string(),
}
);
Ok(())
}
#[test]
fn serializes_image_outputs_as_array() -> Result<()> {
let call_tool_result = CallToolResult {
@@ -2255,6 +2300,40 @@ mod tests {
Ok(())
}
#[test]
fn empty_base64_mcp_image_outputs_are_replaced_with_text() -> Result<()> {
for data in ["", "data:image/png;base64,"] {
let call_tool_result = CallToolResult {
content: vec![
serde_json::json!({"type":"text","text":"caption"}),
serde_json::json!({"type":"image","data":data,"mimeType":"image/png"}),
],
structured_content: None,
is_error: Some(false),
meta: None,
};
let payload = call_tool_result.into_function_call_output_payload();
assert_eq!(payload.success, Some(true));
let Some(items) = payload.content_items() else {
panic!("expected content items for {data:?}");
};
assert_eq!(
items,
[
FunctionCallOutputContentItem::InputText {
text: "caption".into(),
},
FunctionCallOutputContentItem::InputText {
text: EMPTY_BASE64_TOOL_IMAGE_TEXT.to_string(),
},
]
);
}
Ok(())
}
#[test]
fn serializes_custom_tool_image_outputs_as_array() -> Result<()> {
let item = ResponseInputItem::CustomToolCallOutput {