mirror of
https://github.com/openai/codex.git
synced 2026-06-02 11:22:01 +00:00
Compare commits
1 Commits
rust-v0.13
...
rhan/clean
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffbb4e53fb |
@@ -56,8 +56,8 @@ pub fn map_api_error(err: ApiError) -> CodexErr {
|
||||
}
|
||||
|
||||
if status == http::StatusCode::BAD_REQUEST {
|
||||
if let Ok(parsed) = serde_json::from_str::<Value>(&body_text)
|
||||
&& let Some(error) = parsed.get("error")
|
||||
let parsed = serde_json::from_str::<Value>(&body_text).ok();
|
||||
if let Some(error) = parsed.as_ref().and_then(|parsed| parsed.get("error"))
|
||||
&& error.get("code").and_then(Value::as_str)
|
||||
== Some(CYBER_POLICY_ERROR_CODE)
|
||||
{
|
||||
@@ -68,9 +68,7 @@ pub fn map_api_error(err: ApiError) -> CodexErr {
|
||||
.map(str::to_string)
|
||||
.unwrap_or_else(|| CYBER_POLICY_FALLBACK_MESSAGE.to_string());
|
||||
CodexErr::CyberPolicy { message }
|
||||
} else if body_text
|
||||
.contains("The image data you provided does not represent a valid image")
|
||||
{
|
||||
} else if is_invalid_image_response_error(&body_text, parsed.as_ref()) {
|
||||
CodexErr::InvalidImageRequest()
|
||||
} else {
|
||||
CodexErr::InvalidRequest(body_text)
|
||||
@@ -141,6 +139,35 @@ const X_ERROR_JSON_HEADER: &str = "x-error-json";
|
||||
const CYBER_POLICY_ERROR_CODE: &str = "cyber_policy";
|
||||
const CYBER_POLICY_FALLBACK_MESSAGE: &str =
|
||||
"This request has been flagged for possible cybersecurity risk.";
|
||||
const INVALID_IMAGE_ERROR_MESSAGE: &str =
|
||||
"The image data you provided does not represent a valid image";
|
||||
const EMPTY_BASE64_IMAGE_ERROR_MESSAGE: &str = concat!(
|
||||
"Expected a base64-encoded data URL with an image MIME type, ",
|
||||
"but got empty base64-encoded bytes."
|
||||
);
|
||||
|
||||
fn is_invalid_image_response_error(body_text: &str, parsed: Option<&Value>) -> bool {
|
||||
if body_text.contains(INVALID_IMAGE_ERROR_MESSAGE)
|
||||
|| body_text.contains(EMPTY_BASE64_IMAGE_ERROR_MESSAGE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
let Some(error) = parsed.and_then(|parsed| parsed.get("error")) else {
|
||||
return false;
|
||||
};
|
||||
if error.get("code").and_then(Value::as_str) != Some("invalid_value") {
|
||||
return false;
|
||||
}
|
||||
let param = error.get("param").and_then(Value::as_str);
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
|
||||
param.is_some_and(|param| param == "image_url" || param.ends_with(".image_url"))
|
||||
|| (param == Some("url") && message.contains("Error while downloading "))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "api_bridge_tests.rs"]
|
||||
|
||||
@@ -2,6 +2,15 @@ use super::*;
|
||||
use base64::Engine;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn map_bad_request(body: String) -> CodexErr {
|
||||
map_api_error(ApiError::Transport(TransportError::Http {
|
||||
status: http::StatusCode::BAD_REQUEST,
|
||||
url: Some("http://example.com/v1/responses".to_string()),
|
||||
headers: None,
|
||||
body: Some(body),
|
||||
}))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn map_api_error_maps_server_overloaded() {
|
||||
let err = map_api_error(ApiError::ServerOverloaded);
|
||||
@@ -124,6 +133,43 @@ fn map_api_error_keeps_unknown_400_errors_generic() {
|
||||
assert_eq!(message, body);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn map_api_error_maps_image_400s_to_invalid_image_request() {
|
||||
let cases = [
|
||||
serde_json::json!({
|
||||
"error": {
|
||||
"message": "The image data you provided does not represent a valid image. Please check your input and try again.",
|
||||
"type": "invalid_request_error",
|
||||
"param": "input[0].content[1].image_url",
|
||||
"code": "invalid_value"
|
||||
}
|
||||
}),
|
||||
serde_json::json!({
|
||||
"type": "error",
|
||||
"status": 400,
|
||||
"error": {
|
||||
"type": "invalid_request_error",
|
||||
"code": "invalid_value",
|
||||
"message": "Error while downloading http://127.0.0.1:8787/image.png. Upstream status code: 407.",
|
||||
"param": "url"
|
||||
}
|
||||
}),
|
||||
serde_json::json!({
|
||||
"error": {
|
||||
"message": "Expected a base64-encoded data URL with an image MIME type, but got empty base64-encoded bytes.",
|
||||
"code": "invalid_value"
|
||||
}
|
||||
}),
|
||||
];
|
||||
|
||||
for body in cases {
|
||||
assert!(matches!(
|
||||
map_bad_request(body.to_string()),
|
||||
CodexErr::InvalidImageRequest()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn map_api_error_maps_usage_limit_limit_name_header() {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
@@ -58,6 +58,12 @@ pub(crate) struct TotalTokenUsageBreakdown {
|
||||
pub estimated_bytes_of_items_added_since_last_successful_api_response: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(crate) enum ImageSanitizationSource {
|
||||
User,
|
||||
Tool,
|
||||
}
|
||||
|
||||
impl ContextManager {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
@@ -184,38 +190,33 @@ impl ContextManager {
|
||||
self.history_version = self.history_version.saturating_add(1);
|
||||
}
|
||||
|
||||
/// Replace image content in the last turn if it originated from a tool output.
|
||||
/// Returns true when a tool image was replaced, false otherwise.
|
||||
pub(crate) fn replace_last_turn_images(&mut self, placeholder: &str) -> bool {
|
||||
let Some(index) = self.items.iter().rposition(|item| {
|
||||
matches!(item, ResponseItem::FunctionCallOutput { .. }) || is_user_turn_boundary(item)
|
||||
}) else {
|
||||
return false;
|
||||
};
|
||||
/// Replace images in the newest image-bearing history item and report their source.
|
||||
pub(crate) fn replace_newest_images(
|
||||
&mut self,
|
||||
placeholder: &str,
|
||||
) -> Option<ImageSanitizationSource> {
|
||||
let placeholder = placeholder.to_string();
|
||||
for item in self.items.iter_mut().rev() {
|
||||
let source = match item {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
replace_message_images(content, &placeholder)
|
||||
.then_some(ImageSanitizationSource::User)
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
replace_tool_output_images(output, &placeholder)
|
||||
.then_some(ImageSanitizationSource::Tool)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
match &mut self.items[index] {
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
let Some(content_items) = output.content_items_mut() else {
|
||||
return false;
|
||||
};
|
||||
let mut replaced = false;
|
||||
let placeholder = placeholder.to_string();
|
||||
for item in content_items.iter_mut() {
|
||||
if matches!(item, FunctionCallOutputContentItem::InputImage { .. }) {
|
||||
*item = FunctionCallOutputContentItem::InputText {
|
||||
text: placeholder.clone(),
|
||||
};
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
if replaced {
|
||||
self.history_version = self.history_version.saturating_add(1);
|
||||
}
|
||||
replaced
|
||||
if let Some(source) = source {
|
||||
self.history_version = self.history_version.saturating_add(1);
|
||||
return Some(source);
|
||||
}
|
||||
ResponseItem::Message { .. } => false,
|
||||
_ => false,
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Drop the last `num_turns` instruction turns from this history.
|
||||
@@ -729,6 +730,36 @@ fn is_model_generated_item(item: &ResponseItem) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_message_images(content: &mut [ContentItem], placeholder: &str) -> bool {
|
||||
let mut replaced = false;
|
||||
for item in content.iter_mut() {
|
||||
if matches!(item, ContentItem::InputImage { .. }) {
|
||||
*item = ContentItem::InputText {
|
||||
text: placeholder.to_string(),
|
||||
};
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
replaced
|
||||
}
|
||||
|
||||
fn replace_tool_output_images(output: &mut FunctionCallOutputPayload, placeholder: &str) -> bool {
|
||||
let Some(content_items) = output.content_items_mut() else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let mut replaced = false;
|
||||
for item in content_items.iter_mut() {
|
||||
if matches!(item, FunctionCallOutputContentItem::InputImage { .. }) {
|
||||
*item = FunctionCallOutputContentItem::InputText {
|
||||
text: placeholder.to_string(),
|
||||
};
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
replaced
|
||||
}
|
||||
|
||||
pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
|
||||
matches!(
|
||||
item,
|
||||
|
||||
@@ -671,61 +671,148 @@ fn remove_last_item_removes_matching_call_for_output() {
|
||||
assert_eq!(h.raw_items(), vec![user_msg("before tool call")]);
|
||||
}
|
||||
|
||||
fn tool_text(text: &str) -> FunctionCallOutputContentItem {
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: text.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_image(base64: &str) -> FunctionCallOutputContentItem {
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{base64}"),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
}
|
||||
|
||||
fn user_image(base64: &str) -> ContentItem {
|
||||
ContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{base64}"),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_output_content(item: &ResponseItem) -> &[FunctionCallOutputContentItem] {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => output
|
||||
.content_items()
|
||||
.expect("expected content-item tool output"),
|
||||
_ => panic!("expected tool output"),
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_tool_output_texts(item: &ResponseItem, expected: Vec<&str>) {
|
||||
let texts = tool_output_content(item)
|
||||
.iter()
|
||||
.map(|item| match item {
|
||||
FunctionCallOutputContentItem::InputText { text } => text.as_str(),
|
||||
_ => panic!("expected sanitized output to contain only text"),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(texts, expected);
|
||||
}
|
||||
|
||||
fn assert_message_texts(content: &[ContentItem], expected: Vec<&str>) {
|
||||
let texts = content
|
||||
.iter()
|
||||
.map(|item| match item {
|
||||
ContentItem::InputText { text } => text.as_str(),
|
||||
_ => panic!("expected sanitized message to contain only text"),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(texts, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_last_turn_images_replaces_tool_output_images() {
|
||||
fn replace_newest_images_replaces_tool_output_images() {
|
||||
let cases = [
|
||||
(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![tool_image("AAA")]),
|
||||
success: Some(true),
|
||||
},
|
||||
},
|
||||
vec!["Image omitted"],
|
||||
),
|
||||
(
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
name: Some("tool".to_string()),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
tool_text("before"),
|
||||
tool_image("AAA"),
|
||||
]),
|
||||
},
|
||||
vec!["before", "Image omitted"],
|
||||
),
|
||||
];
|
||||
|
||||
for (tool_output, expected_texts) in cases {
|
||||
let mut history = create_history_with_items(vec![user_input_text_msg("hi"), tool_output]);
|
||||
|
||||
assert_eq!(
|
||||
history.replace_newest_images("Image omitted"),
|
||||
Some(ImageSanitizationSource::Tool)
|
||||
);
|
||||
assert_tool_output_texts(&history.raw_items()[1], expected_texts);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_newest_images_replaces_user_images() {
|
||||
let items = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputText {
|
||||
text: "look".to_string(),
|
||||
},
|
||||
user_image("AAA"),
|
||||
],
|
||||
phase: None,
|
||||
}];
|
||||
let mut history = create_history_with_items(items);
|
||||
|
||||
assert_eq!(
|
||||
history.replace_newest_images("Image omitted"),
|
||||
Some(ImageSanitizationSource::User)
|
||||
);
|
||||
|
||||
let ResponseItem::Message { content, .. } = &history.raw_items()[0] else {
|
||||
panic!("expected user message");
|
||||
};
|
||||
assert_message_texts(content, vec!["look", "Image omitted"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_newest_images_replaces_newest_image_bearing_item() {
|
||||
let items = vec![
|
||||
user_input_text_msg("hi"),
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![user_image("user")],
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![tool_image("tool")]),
|
||||
},
|
||||
];
|
||||
let mut history = create_history_with_items(items);
|
||||
|
||||
assert!(history.replace_last_turn_images("Invalid image"));
|
||||
|
||||
assert_eq!(
|
||||
history.raw_items(),
|
||||
vec![
|
||||
user_input_text_msg("hi"),
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "Invalid image".to_string(),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
},
|
||||
]
|
||||
history.replace_newest_images("Image omitted"),
|
||||
Some(ImageSanitizationSource::Tool)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_last_turn_images_does_not_touch_user_images() {
|
||||
let items = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,AAA".to_string(),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}],
|
||||
phase: None,
|
||||
}];
|
||||
let mut history = create_history_with_items(items.clone());
|
||||
|
||||
assert!(!history.replace_last_turn_images("Invalid image"));
|
||||
assert_eq!(history.raw_items(), items);
|
||||
let raw_items = history.raw_items();
|
||||
let ResponseItem::Message { content, .. } = &raw_items[0] else {
|
||||
panic!("expected user message");
|
||||
};
|
||||
assert!(matches!(content[0], ContentItem::InputImage { .. }));
|
||||
assert_tool_output_texts(&raw_items[1], vec!["Image omitted"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -3,6 +3,7 @@ mod normalize;
|
||||
pub(crate) mod updates;
|
||||
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use history::ImageSanitizationSource;
|
||||
pub(crate) use history::TotalTokenUsageBreakdown;
|
||||
pub(crate) use history::estimate_response_item_model_visible_bytes;
|
||||
pub(crate) use history::is_codex_generated_item;
|
||||
|
||||
@@ -103,6 +103,7 @@ use codex_protocol::config_types::ModeKind;
|
||||
use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
@@ -147,9 +148,11 @@ use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_response_sequence;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::sse_response;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::test_path_buf;
|
||||
@@ -177,6 +180,7 @@ use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration as StdDuration;
|
||||
use wiremock::ResponseTemplate;
|
||||
|
||||
mod guardian_tests;
|
||||
|
||||
@@ -185,6 +189,11 @@ struct InstructionsTestCase {
|
||||
expects_apply_patch_description: bool,
|
||||
}
|
||||
|
||||
const INVALID_IMAGE_PLACEHOLDER: &str =
|
||||
"Image omitted: Responses could not read this image URL/data.";
|
||||
const INVALID_IMAGE_HISTORY_CHECKPOINT: &str =
|
||||
"Sanitized an invalid image from conversation history.";
|
||||
|
||||
fn user_message(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -196,6 +205,76 @@ fn user_message(text: &str) -> ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_image_download_response() -> ResponseTemplate {
|
||||
ResponseTemplate::new(400).set_body_json(json!({
|
||||
"type": "error",
|
||||
"status": 400,
|
||||
"error": {
|
||||
"type": "invalid_request_error",
|
||||
"code": "invalid_value",
|
||||
"message": "Error while downloading http://127.0.0.1:8787/image.png. Upstream status code: 407.",
|
||||
"param": "url",
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
fn invalid_image_data_response() -> ResponseTemplate {
|
||||
ResponseTemplate::new(400).set_body_json(json!({
|
||||
"error": {
|
||||
"message": "The image data you provided does not represent a valid image. Please check your input and try again.",
|
||||
"type": "invalid_request_error",
|
||||
"param": "input[0].content[1].image_url",
|
||||
"code": "invalid_value",
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
fn response_items_have_images(items: &[ResponseItem]) -> bool {
|
||||
items.iter().any(|item| match item {
|
||||
ResponseItem::Message { content, .. } => content
|
||||
.iter()
|
||||
.any(|item| matches!(item, ContentItem::InputImage { .. })),
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
output.content_items().is_some_and(|items| {
|
||||
items
|
||||
.iter()
|
||||
.any(|item| matches!(item, FunctionCallOutputContentItem::InputImage { .. }))
|
||||
})
|
||||
}
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
fn assert_no_images_in_sanitized_history(items: &[RolloutItem]) -> &[ResponseItem] {
|
||||
let replacement_history = items
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|item| match item {
|
||||
RolloutItem::Compacted(compacted)
|
||||
if compacted.message == INVALID_IMAGE_HISTORY_CHECKPOINT =>
|
||||
{
|
||||
compacted.replacement_history.as_deref()
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.expect("sanitized checkpoint");
|
||||
assert!(!response_items_have_images(replacement_history));
|
||||
replacement_history
|
||||
}
|
||||
|
||||
fn response_item_has_tool_image_placeholder(item: &ResponseItem) -> bool {
|
||||
matches!(
|
||||
item,
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
if output.content_items().is_some_and(|items| items.iter().any(|item| matches!(
|
||||
item,
|
||||
FunctionCallOutputContentItem::InputText { text }
|
||||
if text == INVALID_IMAGE_PLACEHOLDER
|
||||
)))
|
||||
)
|
||||
}
|
||||
|
||||
fn assistant_message(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -2929,6 +3008,151 @@ async fn thread_rollback_fails_when_num_turns_is_zero() {
|
||||
assert_eq!(initial_context, history.raw_items());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn invalid_user_image_is_sanitized_before_follow_up_turn() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
let responses = mount_response_sequence(
|
||||
&server,
|
||||
vec![
|
||||
invalid_image_download_response(),
|
||||
sse_response(sse(vec![
|
||||
ev_assistant_message("msg-1", "ok"),
|
||||
ev_completed("resp-2"),
|
||||
])),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let image_url = "http://127.0.0.1:8787/image.png".to_string();
|
||||
let first_turn_id = test
|
||||
.codex
|
||||
.submit(Op::UserInput {
|
||||
environments: None,
|
||||
items: vec![UserInput::Image {
|
||||
image_url: image_url.clone(),
|
||||
detail: None,
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
responsesapi_client_metadata: None,
|
||||
thread_settings: Default::default(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let error = wait_for_event_match(&test.codex, |event| match event {
|
||||
EventMsg::Error(error) => Some(error.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(error.codex_error_info, Some(CodexErrorInfo::BadRequest));
|
||||
assert!(error.message.contains("removed that image from history"));
|
||||
wait_for_event_match(&test.codex, |event| match event {
|
||||
EventMsg::TurnComplete(event) if event.turn_id == first_turn_id => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
test.submit_turn("say ok").await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
assert_eq!(
|
||||
requests[0].message_input_image_urls("user"),
|
||||
vec![image_url]
|
||||
);
|
||||
assert!(requests[1].message_input_image_urls("user").is_empty());
|
||||
assert!(requests[1].body_contains_text(INVALID_IMAGE_PLACEHOLDER));
|
||||
|
||||
test.codex.flush_rollout().await?;
|
||||
let stored_history = test.codex.load_history(/*include_archived*/ false).await?;
|
||||
assert_no_images_in_sanitized_history(&stored_history.items);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn invalid_tool_image_is_replaced_and_returned_to_model() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
test.codex
|
||||
.inject_response_items(vec![
|
||||
user_message("capture a screenshot"),
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "view_image".to_string(),
|
||||
namespace: None,
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-image".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-image".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "screenshot captured".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,not-really-an-image".to_string(),
|
||||
detail: None,
|
||||
},
|
||||
]),
|
||||
},
|
||||
])
|
||||
.await?;
|
||||
|
||||
let responses = mount_response_sequence(
|
||||
&server,
|
||||
vec![
|
||||
invalid_image_data_response(),
|
||||
sse_response(sse(vec![
|
||||
ev_assistant_message("msg-1", "I could not inspect the image."),
|
||||
ev_completed("resp-2"),
|
||||
])),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("continue").await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
assert!(
|
||||
requests[0]
|
||||
.function_call_output("call-image")
|
||||
.get("output")
|
||||
.and_then(serde_json::Value::as_array)
|
||||
.is_some_and(|items| items
|
||||
.iter()
|
||||
.any(|item| item.get("type").and_then(serde_json::Value::as_str)
|
||||
== Some("input_image")))
|
||||
);
|
||||
let sanitized_output = requests[1].function_call_output("call-image");
|
||||
assert_eq!(
|
||||
sanitized_output.get("output"),
|
||||
Some(&json!([
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "screenshot captured",
|
||||
},
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": INVALID_IMAGE_PLACEHOLDER,
|
||||
}
|
||||
]))
|
||||
);
|
||||
|
||||
test.codex.flush_rollout().await?;
|
||||
let stored_history = test.codex.load_history(/*include_archived*/ false).await?;
|
||||
assert!(
|
||||
assert_no_images_in_sanitized_history(&stored_history.items)
|
||||
.iter()
|
||||
.any(response_item_has_tool_image_placeholder)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn set_rate_limits_retains_previous_credits() {
|
||||
let codex_home = tempfile::tempdir().expect("create temp dir");
|
||||
|
||||
@@ -16,6 +16,7 @@ use crate::compact_remote::run_inline_remote_auto_compact_task;
|
||||
use crate::compact_remote_v2::run_inline_remote_auto_compact_task as run_inline_remote_auto_compact_task_v2;
|
||||
use crate::connectors;
|
||||
use crate::context::ContextualUserFragment;
|
||||
use crate::context_manager::ImageSanitizationSource;
|
||||
use crate::feedback_tags;
|
||||
use crate::goals::GoalRuntimeEvent;
|
||||
use crate::hook_runtime::inspect_pending_input;
|
||||
@@ -86,11 +87,13 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AgentMessageContentDeltaEvent;
|
||||
use codex_protocol::protocol::AgentReasoningSectionBreakEvent;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::CompactedItem;
|
||||
use codex_protocol::protocol::ErrorEvent;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::PlanDeltaEvent;
|
||||
use codex_protocol::protocol::ReasoningContentDeltaEvent;
|
||||
use codex_protocol::protocol::ReasoningRawContentDeltaEvent;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::TurnDiffEvent;
|
||||
use codex_protocol::protocol::WarningEvent;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
@@ -114,6 +117,15 @@ use tracing::trace;
|
||||
use tracing::trace_span;
|
||||
use tracing::warn;
|
||||
|
||||
const INVALID_IMAGE_FEEDBACK_PLACEHOLDER: &str =
|
||||
"Image omitted: Responses could not read this image URL/data.";
|
||||
const INVALID_IMAGE_USER_ERROR_MESSAGE: &str = concat!(
|
||||
"Responses could not read one of the images in this conversation, ",
|
||||
"so Codex removed that image from history. Please try again with a different image."
|
||||
);
|
||||
const INVALID_IMAGE_HISTORY_CHECKPOINT_MESSAGE: &str =
|
||||
"Sanitized an invalid image from conversation history.";
|
||||
|
||||
/// Takes a user message as input and runs a loop where, at each sampling request, the model
|
||||
/// replies with either:
|
||||
///
|
||||
@@ -410,19 +422,15 @@ pub(crate) async fn run_turn(
|
||||
break;
|
||||
}
|
||||
Err(CodexErr::InvalidImageRequest()) => {
|
||||
warn!("Responses rejected an image; sanitizing history to prevent replay");
|
||||
if sanitize_invalid_image_history(&sess).await
|
||||
== Some(ImageSanitizationSource::Tool)
|
||||
{
|
||||
let mut state = sess.state.lock().await;
|
||||
error_or_panic(
|
||||
"Invalid image detected; sanitizing tool output to prevent poisoning",
|
||||
);
|
||||
if state.history.replace_last_turn_images("Invalid image") {
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: "Invalid image in your last message. Please remove it and try again."
|
||||
.to_string(),
|
||||
message: INVALID_IMAGE_USER_ERROR_MESSAGE.to_string(),
|
||||
codex_error_info: Some(CodexErrorInfo::BadRequest),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
@@ -450,6 +458,27 @@ pub(crate) async fn run_turn(
|
||||
last_agent_message
|
||||
}
|
||||
|
||||
async fn sanitize_invalid_image_history(sess: &Session) -> Option<ImageSanitizationSource> {
|
||||
let (source, replacement_history) = {
|
||||
let mut state = sess.state.lock().await;
|
||||
let source = state
|
||||
.history
|
||||
.replace_newest_images(INVALID_IMAGE_FEEDBACK_PLACEHOLDER)?;
|
||||
(source, state.history.raw_items().to_vec())
|
||||
};
|
||||
|
||||
sess.persist_rollout_items(&[RolloutItem::Compacted(CompactedItem {
|
||||
message: INVALID_IMAGE_HISTORY_CHECKPOINT_MESSAGE.to_string(),
|
||||
replacement_history: Some(replacement_history),
|
||||
})])
|
||||
.await;
|
||||
if let Err(err) = sess.flush_rollout().await {
|
||||
warn!("failed to persist invalid-image history sanitization: {err}");
|
||||
}
|
||||
|
||||
Some(source)
|
||||
}
|
||||
|
||||
#[expect(
|
||||
clippy::await_holding_invalid_type,
|
||||
reason = "MCP tool listing borrows the read guard across cancellation-aware await"
|
||||
|
||||
@@ -24,6 +24,8 @@ use codex_protocol::permissions::FileSystemSandboxEntry;
|
||||
use codex_protocol::permissions::FileSystemSandboxPolicy;
|
||||
use codex_protocol::permissions::NetworkSandboxPolicy;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
#[cfg(not(debug_assertions))]
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::TurnEnvironmentSelection;
|
||||
@@ -1500,6 +1502,18 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
|
||||
))
|
||||
.await?;
|
||||
|
||||
let error = wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::Error(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
let EventMsg::Error(error) = error else {
|
||||
panic!("expected error event");
|
||||
};
|
||||
assert_eq!(error.codex_error_info, Some(CodexErrorInfo::BadRequest));
|
||||
assert!(error.message.contains("removed that image from history"));
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
@@ -1513,14 +1527,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
|
||||
"initial request should include the uploaded image"
|
||||
);
|
||||
|
||||
let second_request = completion_mock.single_request();
|
||||
let second_body = second_request.body_json();
|
||||
assert!(
|
||||
find_image_message(&second_body).is_none(),
|
||||
"second request should replace the invalid image"
|
||||
completion_mock.requests().is_empty(),
|
||||
"bad user image should not trigger an immediate model retry"
|
||||
);
|
||||
let user_texts = second_request.message_input_texts("user");
|
||||
assert!(user_texts.iter().any(|text| text == "Invalid image"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user