feat: error on user-injected bad images

This commit is contained in:
jif-oai
2026-01-09 18:57:14 +00:00
parent 1aed01e99f
commit 48502af5a2
3 changed files with 112 additions and 69 deletions

View File

@@ -156,6 +156,7 @@ use codex_async_utils::OrCancelExt;
use codex_otel::OtelManager;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
@@ -1551,6 +1552,39 @@ impl Session {
}
}
pub(crate) async fn current_turn_images_tool_only(&self) -> bool {
let history = self.state.lock().await.clone_history();
let mut saw_user = false;
let mut saw_tool = false;
for item in history.raw_items().iter().rev() {
match item {
ResponseItem::Message { role, content, .. } => {
if role == "assistant" {
break;
}
if role == "user"
&& content
.iter()
.any(|item| matches!(item, ContentItem::InputImage { .. }))
{
saw_user = true;
}
}
ResponseItem::FunctionCallOutput { output, .. } => {
if let Some(items) = &output.content_items
&& items.iter().any(|item| {
matches!(item, FunctionCallOutputContentItem::InputImage { .. })
})
{
saw_tool = true;
}
}
_ => {}
}
}
saw_tool && !saw_user
}
pub async fn list_resources(
&self,
server: &str,
@@ -2449,11 +2483,19 @@ pub(crate) async fn run_turn(
break;
}
Err(CodexErr::InvalidImageRequest()) => {
let mut state = sess.state.lock().await;
error_or_panic(
"Invalid image detected, replacing it in the last turn to prevent poisoning",
);
state.history.replace_last_turn_images("Invalid image");
if sess.current_turn_images_tool_only().await {
let mut state = sess.state.lock().await;
error_or_panic(
"Invalid image detected, replacing it in the last turn to prevent poisoning",
);
state.history.replace_last_turn_images("Invalid image");
} else {
let err = CodexErr::InvalidImageRequest();
info!("Turn error: {err:#}");
let event = EventMsg::Error(err.to_error_event(None));
sess.send_event(&turn_context, event).await;
break;
}
}
Err(e) => {
info!("Turn error: {e:#}");

View File

@@ -11,6 +11,9 @@ use crate::tools::context::ToolPayload;
use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::user_input::UserInput;
pub struct ViewImageHandler;
@@ -63,15 +66,6 @@ impl ToolHandler for ViewImageHandler {
}
let event_path = abs_path.clone();
session
.inject_input(vec![UserInput::LocalImage { path: abs_path }])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;
session
.send_event(
turn.as_ref(),
@@ -82,9 +76,34 @@ impl ToolHandler for ViewImageHandler {
)
.await;
let response_input: ResponseInputItem = vec![UserInput::LocalImage {
path: abs_path.clone(),
}]
.into();
let image_url = match response_input {
ResponseInputItem::Message { content, .. } => match content.into_iter().next() {
Some(ContentItem::InputImage { image_url }) => image_url,
Some(ContentItem::InputText { text }) => {
return Err(FunctionCallError::RespondToModel(text));
}
_ => {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
},
_ => {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
};
Ok(ToolOutput::Function {
content: "attached local image path".to_string(),
content_items: None,
content_items: Some(vec![FunctionCallOutputContentItem::InputImage {
image_url,
}]),
success: Some(true),
})
}

View File

@@ -46,6 +46,27 @@ fn find_image_message(body: &Value) -> Option<&Value> {
})
}
fn find_tool_output_image_url(body: &Value, call_id: &str) -> Option<String> {
let items = body.get("input").and_then(Value::as_array)?;
let output = items
.iter()
.find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})?
.get("output")?;
let output_items = output.as_array()?;
output_items.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url")
.and_then(Value::as_str)
.map(str::to_string)
} else {
None
}
})
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
@@ -208,27 +229,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
let req = mock.single_request();
let body = req.body_json();
let output_text = req
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
let image_message =
find_image_message(&body).expect("pending input image message not included in request");
let image_url = image_message
.get("content")
.and_then(Value::as_array)
.and_then(|content| {
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url").and_then(Value::as_str)
} else {
None
}
})
})
.expect("image_url present");
let image_url =
find_tool_output_image_url(&body, call_id).expect("tool output image not included");
let (prefix, encoded) = image_url
.split_once(',')
@@ -318,7 +320,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -378,36 +380,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
"non-image file should not produce an input_image message"
);
let placeholder = request
.inputs_of_type("message")
.iter()
.find_map(|item| {
let content = item.get("content").and_then(Value::as_array)?;
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_text") {
let text = span.get("text").and_then(Value::as_str)?;
if text.contains("Codex could not read the local image at")
&& text.contains("unsupported MIME type `application/json`")
{
return Some(text.to_string());
}
}
None
})
})
.expect("placeholder text found");
assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
);
let output_text = mock
.single_request()
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
assert!(
output_text.contains("Codex could not read the local image at"),
"expected tool error for non-image file"
);
assert!(
output_text.contains(&abs_path.display().to_string()),
"output should mention path: {output_text}"
);
Ok(())
}
@@ -485,7 +470,7 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
#[cfg(not(debug_assertions))]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
async fn reports_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -542,6 +527,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::Error(_))).await;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let first_body = invalid_image_mock.single_request().body_json();
@@ -550,14 +536,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
"initial request should include the uploaded image"
);
let second_request = completion_mock.single_request();
let second_body = second_request.body_json();
assert!(
find_image_message(&second_body).is_none(),
"second request should replace the invalid image"
completion_mock.requests().is_empty(),
"invalid user image should stop the turn before retrying"
);
let user_texts = second_request.message_input_texts("user");
assert!(user_texts.iter().any(|text| text == "Invalid image"));
Ok(())
}