Compare commits

...

5 Commits

Author SHA1 Message Date
jif-oai
aa18e1689b Merge branch 'main' into jif/error-on-image-origin 2026-01-13 11:58:53 +00:00
jif-oai
f41a982b03 fix merge 2026-01-12 09:11:58 +00:00
jif-oai
7cf6f4e85c Merge remote-tracking branch 'origin/main' into jif/error-on-image-origin
# Conflicts:
#	codex-rs/core/src/tools/handlers/view_image.rs
#	codex-rs/core/tests/suite/view_image.rs
2026-01-12 09:03:12 +00:00
jif-oai
4c1703dfad clean up 2026-01-09 19:23:33 +00:00
jif-oai
48502af5a2 feat: error on user-injected bad images 2026-01-09 18:57:14 +00:00
4 changed files with 125 additions and 109 deletions

View File

@@ -1579,24 +1579,6 @@ impl Session {
}
}
/// Returns the input if there was no task running to inject into
pub async fn inject_response_items(
&self,
input: Vec<ResponseInputItem>,
) -> Result<(), Vec<ResponseInputItem>> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Some(at) => {
let mut ts = at.turn_state.lock().await;
for item in input {
ts.push_pending_input(item);
}
Ok(())
}
None => Err(input),
}
}
pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
@@ -2611,11 +2593,25 @@ pub(crate) async fn run_turn(
break;
}
Err(CodexErr::InvalidImageRequest()) => {
let mut state = sess.state.lock().await;
error_or_panic(
"Invalid image detected, replacing it in the last turn to prevent poisoning",
);
state.history.replace_last_turn_images("Invalid image");
let tool_only = {
let mut state = sess.state.lock().await;
let tool_only = state.history.current_turn_images_tool_only();
error_or_panic(if tool_only {
"Invalid image detected, replacing it in the last turn to prevent poisoning"
} else {
"Invalid user image detected; replacing it in the last turn to prevent poisoning"
});
state.history.replace_last_turn_images("Invalid image");
tool_only
};
if tool_only {
continue;
}
let err = CodexErr::InvalidImageRequest();
info!("Turn error: {err:#}");
let event = EventMsg::Error(err.to_error_event(None));
sess.send_event(&turn_context, event).await;
break;
}
Err(e) => {
info!("Turn error: {e:#}");

View File

@@ -155,6 +155,34 @@ impl ContextManager {
}
}
pub(crate) fn current_turn_images_tool_only(&self) -> bool {
let mut saw_tool = false;
for item in self.items.iter().rev() {
match item {
ResponseItem::Message { role, .. } if role == "assistant" => break,
ResponseItem::Message { role, content, .. } if role == "user" => {
if content
.iter()
.any(|item| matches!(item, ContentItem::InputImage { .. }))
{
return false;
}
}
ResponseItem::FunctionCallOutput { output, .. } => {
if output.content_items.as_ref().is_some_and(|items| {
items.iter().any(|item| {
matches!(item, FunctionCallOutputContentItem::InputImage { .. })
})
}) {
saw_tool = true;
}
}
_ => {}
}
}
saw_tool
}
/// Drop the last `num_turns` user turns from this history.
///
/// "User turns" are identified as `ResponseItem::Message` entries whose role is `"user"`.

View File

@@ -12,8 +12,9 @@ use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::local_image_content_items_with_label_number;
use codex_protocol::user_input::UserInput;
pub struct ViewImageHandler;
@@ -65,22 +66,6 @@ impl ToolHandler for ViewImageHandler {
}
let event_path = abs_path.clone();
let content: Vec<ContentItem> =
local_image_content_items_with_label_number(&abs_path, None);
let input = ResponseInputItem::Message {
role: "user".to_string(),
content,
};
session
.inject_response_items(vec![input])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;
session
.send_event(
turn.as_ref(),
@@ -91,9 +76,48 @@ impl ToolHandler for ViewImageHandler {
)
.await;
let response_input: ResponseInputItem = vec![UserInput::LocalImage {
path: abs_path.clone(),
}]
.into();
let image_url = match response_input {
ResponseInputItem::Message { content, .. } => {
let mut image_url = None;
let mut text_fallback = None;
for item in content {
match item {
ContentItem::InputImage { image_url: url } => {
image_url = Some(url);
break;
}
ContentItem::InputText { text } => {
text_fallback.get_or_insert(text);
}
_ => {}
}
}
if let Some(url) = image_url {
url
} else if let Some(text) = text_fallback {
return Err(FunctionCallError::RespondToModel(text));
} else {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
}
_ => {
return Err(FunctionCallError::RespondToModel(
"unexpected image input payload".to_string(),
));
}
};
Ok(ToolOutput::Function {
content: "attached local image path".to_string(),
content_items: None,
content_items: Some(vec![FunctionCallOutputContentItem::InputImage {
image_url,
}]),
success: Some(true),
})
}

View File

@@ -46,6 +46,27 @@ fn find_image_message(body: &Value) -> Option<&Value> {
})
}
fn find_tool_output_image_url(body: &Value, call_id: &str) -> Option<String> {
let items = body.get("input").and_then(Value::as_array)?;
let output = items
.iter()
.find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})?
.get("output")?;
let output_items = output.as_array()?;
output_items.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url")
.and_then(Value::as_str)
.map(str::to_string)
} else {
None
}
})
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
@@ -208,41 +229,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
let req = mock.single_request();
let body = req.body_json();
let output_text = req
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
let image_message =
find_image_message(&body).expect("pending input image message not included in request");
let content_items = image_message
.get("content")
.and_then(Value::as_array)
.expect("image message has content array");
assert_eq!(
content_items.len(),
1,
"view_image should inject only the image content item (no tag/label text)"
);
assert_eq!(
content_items[0].get("type").and_then(Value::as_str),
Some("input_image"),
"view_image should inject only an input_image content item"
);
let image_url = image_message
.get("content")
.and_then(Value::as_array)
.and_then(|content| {
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url").and_then(Value::as_str)
} else {
None
}
})
})
.expect("image_url present");
let image_url =
find_tool_output_image_url(&body, call_id).expect("tool output image not included");
let (prefix, encoded) = image_url
.split_once(',')
@@ -332,7 +320,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -392,36 +380,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
"non-image file should not produce an input_image message"
);
let placeholder = request
.inputs_of_type("message")
.iter()
.find_map(|item| {
let content = item.get("content").and_then(Value::as_array)?;
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_text") {
let text = span.get("text").and_then(Value::as_str)?;
if text.contains("Codex could not read the local image at")
&& text.contains("unsupported MIME type `application/json`")
{
return Some(text.to_string());
}
}
None
})
})
.expect("placeholder text found");
assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
);
let output_text = mock
.single_request()
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
assert!(
output_text.contains("Codex could not read the local image at"),
"expected tool error for non-image file"
);
assert!(
output_text.contains(&abs_path.display().to_string()),
"output should mention path: {output_text}"
);
Ok(())
}
@@ -499,7 +470,7 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
#[cfg(not(debug_assertions))]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
async fn reports_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -556,6 +527,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::Error(_))).await;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let first_body = invalid_image_mock.single_request().body_json();
@@ -564,14 +536,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
"initial request should include the uploaded image"
);
let second_request = completion_mock.single_request();
let second_body = second_request.body_json();
assert!(
find_image_message(&second_body).is_none(),
"second request should replace the invalid image"
completion_mock.requests().is_empty(),
"invalid user image should stop the turn before retrying"
);
let user_texts = second_request.message_input_texts("user");
assert!(user_texts.iter().any(|text| text == "Invalid image"));
Ok(())
}