mirror of
https://github.com/openai/codex.git
synced 2026-04-26 15:45:02 +00:00
Always expose view_image and return unsupported image-input error (#11336)
- Keep `view_image` in the advertised tool list for all models. - Return a clear error when the current model does not support image inputs, and cover it with a unit test.
This commit is contained in:
@@ -2,17 +2,28 @@
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::openai_models::ConfigShellToolType;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
use codex_protocol::openai_models::ModelVisibility;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
use codex_protocol::openai_models::TruncationPolicyConfig;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_models_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -26,6 +37,8 @@ use image::Rgba;
|
||||
use image::load_from_memory;
|
||||
use serde_json::Value;
|
||||
use tokio::time::Duration;
|
||||
use wiremock::BodyPrintLimit;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn find_image_message(body: &Value) -> Option<&Value> {
|
||||
body.get("input")
|
||||
@@ -521,6 +534,120 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
// Use MockServer directly (not start_mock_server) so the first /models request returns our
|
||||
// text-only model. start_mock_server mounts empty models first, causing get_model_info to
|
||||
// fall back to model_info_from_slug with default_input_modalities (Text+Image), which would
|
||||
// incorrectly allow view_image.
|
||||
let server = MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
.start()
|
||||
.await;
|
||||
let model_slug = "text-only-view-image-test-model";
|
||||
let text_only_model = ModelInfo {
|
||||
slug: model_slug.to_string(),
|
||||
display_name: "Text-only view_image test model".to_string(),
|
||||
description: Some("Remote model for view_image unsupported-path coverage".to_string()),
|
||||
default_reasoning_level: Some(ReasoningEffort::Medium),
|
||||
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: ReasoningEffort::Medium.to_string(),
|
||||
}],
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
visibility: ModelVisibility::List,
|
||||
supported_in_api: true,
|
||||
input_modalities: vec![InputModality::Text],
|
||||
priority: 1,
|
||||
upgrade: None,
|
||||
base_instructions: "base instructions".to_string(),
|
||||
model_messages: None,
|
||||
supports_reasoning_summaries: false,
|
||||
support_verbosity: false,
|
||||
default_verbosity: None,
|
||||
apply_patch_tool_type: None,
|
||||
truncation_policy: TruncationPolicyConfig::bytes(10_000),
|
||||
supports_parallel_tool_calls: false,
|
||||
context_window: Some(272_000),
|
||||
auto_compact_token_limit: None,
|
||||
effective_context_window_percent: 95,
|
||||
experimental_supported_tools: Vec::new(),
|
||||
};
|
||||
mount_models_once(
|
||||
&server,
|
||||
ModelsResponse {
|
||||
models: vec![text_only_model],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, cwd, .. } = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteModels);
|
||||
config.model = Some(model_slug.to_string());
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
let rel_path = "assets/example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let image = ImageBuffer::from_pixel(20, 20, Rgba([255u8, 0, 0, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-unsupported-model";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the image".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: model_slug.to_string(),
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let output_text = mock
|
||||
.single_request()
|
||||
.function_call_output_content_and_success(call_id)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("output text present");
|
||||
assert_eq!(
|
||||
output_text,
|
||||
"view_image is not allowed because you do not support image inputs"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user