mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feat: error on user-injected bad images
This commit is contained in:
@@ -156,6 +156,7 @@ use codex_async_utils::OrCancelExt;
|
||||
use codex_otel::OtelManager;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
@@ -1551,6 +1552,39 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn current_turn_images_tool_only(&self) -> bool {
|
||||
let history = self.state.lock().await.clone_history();
|
||||
let mut saw_user = false;
|
||||
let mut saw_tool = false;
|
||||
for item in history.raw_items().iter().rev() {
|
||||
match item {
|
||||
ResponseItem::Message { role, content, .. } => {
|
||||
if role == "assistant" {
|
||||
break;
|
||||
}
|
||||
if role == "user"
|
||||
&& content
|
||||
.iter()
|
||||
.any(|item| matches!(item, ContentItem::InputImage { .. }))
|
||||
{
|
||||
saw_user = true;
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
if let Some(items) = &output.content_items
|
||||
&& items.iter().any(|item| {
|
||||
matches!(item, FunctionCallOutputContentItem::InputImage { .. })
|
||||
})
|
||||
{
|
||||
saw_tool = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
saw_tool && !saw_user
|
||||
}
|
||||
|
||||
pub async fn list_resources(
|
||||
&self,
|
||||
server: &str,
|
||||
@@ -2449,11 +2483,19 @@ pub(crate) async fn run_turn(
|
||||
break;
|
||||
}
|
||||
Err(CodexErr::InvalidImageRequest()) => {
|
||||
let mut state = sess.state.lock().await;
|
||||
error_or_panic(
|
||||
"Invalid image detected, replacing it in the last turn to prevent poisoning",
|
||||
);
|
||||
state.history.replace_last_turn_images("Invalid image");
|
||||
if sess.current_turn_images_tool_only().await {
|
||||
let mut state = sess.state.lock().await;
|
||||
error_or_panic(
|
||||
"Invalid image detected, replacing it in the last turn to prevent poisoning",
|
||||
);
|
||||
state.history.replace_last_turn_images("Invalid image");
|
||||
} else {
|
||||
let err = CodexErr::InvalidImageRequest();
|
||||
info!("Turn error: {err:#}");
|
||||
let event = EventMsg::Error(err.to_error_event(None));
|
||||
sess.send_event(&turn_context, event).await;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
info!("Turn error: {e:#}");
|
||||
|
||||
@@ -11,6 +11,9 @@ use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::parse_arguments;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
pub struct ViewImageHandler;
|
||||
@@ -63,15 +66,6 @@ impl ToolHandler for ViewImageHandler {
|
||||
}
|
||||
let event_path = abs_path.clone();
|
||||
|
||||
session
|
||||
.inject_input(vec![UserInput::LocalImage { path: abs_path }])
|
||||
.await
|
||||
.map_err(|_| {
|
||||
FunctionCallError::RespondToModel(
|
||||
"unable to attach image (no active task)".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
@@ -82,9 +76,34 @@ impl ToolHandler for ViewImageHandler {
|
||||
)
|
||||
.await;
|
||||
|
||||
let response_input: ResponseInputItem = vec![UserInput::LocalImage {
|
||||
path: abs_path.clone(),
|
||||
}]
|
||||
.into();
|
||||
let image_url = match response_input {
|
||||
ResponseInputItem::Message { content, .. } => match content.into_iter().next() {
|
||||
Some(ContentItem::InputImage { image_url }) => image_url,
|
||||
Some(ContentItem::InputText { text }) => {
|
||||
return Err(FunctionCallError::RespondToModel(text));
|
||||
}
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"unexpected image input payload".to_string(),
|
||||
));
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"unexpected image input payload".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(ToolOutput::Function {
|
||||
content: "attached local image path".to_string(),
|
||||
content_items: None,
|
||||
content_items: Some(vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url,
|
||||
}]),
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -46,6 +46,27 @@ fn find_image_message(body: &Value) -> Option<&Value> {
|
||||
})
|
||||
}
|
||||
|
||||
fn find_tool_output_image_url(body: &Value, call_id: &str) -> Option<String> {
|
||||
let items = body.get("input").and_then(Value::as_array)?;
|
||||
let output = items
|
||||
.iter()
|
||||
.find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})?
|
||||
.get("output")?;
|
||||
let output_items = output.as_array()?;
|
||||
output_items.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_image") {
|
||||
span.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -208,27 +229,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
|
||||
let req = mock.single_request();
|
||||
let body = req.body_json();
|
||||
let output_text = req
|
||||
.function_call_output_content_and_success(call_id)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("output text present");
|
||||
assert_eq!(output_text, "attached local image path");
|
||||
|
||||
let image_message =
|
||||
find_image_message(&body).expect("pending input image message not included in request");
|
||||
let image_url = image_message
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|content| {
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_image") {
|
||||
span.get("image_url").and_then(Value::as_str)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.expect("image_url present");
|
||||
let image_url =
|
||||
find_tool_output_image_url(&body, call_id).expect("tool output image not included");
|
||||
|
||||
let (prefix, encoded) = image_url
|
||||
.split_once(',')
|
||||
@@ -318,7 +320,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
|
||||
async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -378,36 +380,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
|
||||
"non-image file should not produce an input_image message"
|
||||
);
|
||||
|
||||
let placeholder = request
|
||||
.inputs_of_type("message")
|
||||
.iter()
|
||||
.find_map(|item| {
|
||||
let content = item.get("content").and_then(Value::as_array)?;
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_text") {
|
||||
let text = span.get("text").and_then(Value::as_str)?;
|
||||
if text.contains("Codex could not read the local image at")
|
||||
&& text.contains("unsupported MIME type `application/json`")
|
||||
{
|
||||
return Some(text.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.expect("placeholder text found");
|
||||
|
||||
assert!(
|
||||
placeholder.contains(&abs_path.display().to_string()),
|
||||
"placeholder should mention path: {placeholder}"
|
||||
);
|
||||
|
||||
let output_text = mock
|
||||
.single_request()
|
||||
.function_call_output_content_and_success(call_id)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("output text present");
|
||||
assert_eq!(output_text, "attached local image path");
|
||||
assert!(
|
||||
output_text.contains("Codex could not read the local image at"),
|
||||
"expected tool error for non-image file"
|
||||
);
|
||||
assert!(
|
||||
output_text.contains(&abs_path.display().to_string()),
|
||||
"output should mention path: {output_text}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -485,7 +470,7 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
|
||||
async fn reports_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -542,6 +527,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::Error(_))).await;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let first_body = invalid_image_mock.single_request().body_json();
|
||||
@@ -550,14 +536,10 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
|
||||
"initial request should include the uploaded image"
|
||||
);
|
||||
|
||||
let second_request = completion_mock.single_request();
|
||||
let second_body = second_request.body_json();
|
||||
assert!(
|
||||
find_image_message(&second_body).is_none(),
|
||||
"second request should replace the invalid image"
|
||||
completion_mock.requests().is_empty(),
|
||||
"invalid user image should stop the turn before retrying"
|
||||
);
|
||||
let user_texts = second_request.message_input_texts("user");
|
||||
assert!(user_texts.iter().any(|text| text == "Invalid image"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user