From 82210d2a104daa5b843b4e849f7b53ea81be28b3 Mon Sep 17 00:00:00 2001 From: Colin Young Date: Fri, 30 Jan 2026 14:54:09 -0800 Subject: [PATCH] [Codex][CLI] Gate image inputs by model modalities --- .../app-server/tests/common/models_cache.rs | 2 + .../codex-api/tests/models_integration.rs | 2 + .../core/src/models_manager/model_info.rs | 2 + .../core/src/models_manager/model_presets.rs | 12 +++++ codex-rs/core/tests/suite/list_models.rs | 12 +++++ codex-rs/core/tests/suite/models_cache_ttl.rs | 2 + codex-rs/core/tests/suite/personality.rs | 4 ++ codex-rs/core/tests/suite/remote_models.rs | 4 ++ codex-rs/protocol/src/openai_models.rs | 34 ++++++++++++++ codex-rs/tui/src/bottom_pane/chat_composer.rs | 4 ++ codex-rs/tui/src/bottom_pane/mod.rs | 5 +++ codex-rs/tui/src/chatwidget.rs | 45 +++++++++++++++++++ codex-rs/tui/src/chatwidget/tests.rs | 3 ++ 13 files changed, 131 insertions(+) diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index 59fddc2427..d281936db8 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -6,6 +6,7 @@ use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use serde_json::json; use std::path::Path; @@ -38,6 +39,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index c75c46a28a..2145be6542 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -11,6 +11,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use http::HeaderMap; use http::Method; use wiremock::Mock; @@ -88,6 +89,7 @@ async fn models_client_hits_models_endpoint() { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }], }; diff --git a/codex-rs/core/src/models_manager/model_info.rs b/codex-rs/core/src/models_manager/model_info.rs index eaab160001..5cccefdd21 100644 --- a/codex-rs/core/src/models_manager/model_info.rs +++ b/codex-rs/core/src/models_manager/model_info.rs @@ -9,6 +9,7 @@ use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationMode; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use crate::config::Config; use crate::features::Feature; @@ -66,6 +67,7 @@ macro_rules! model_info { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; $( diff --git a/codex-rs/core/src/models_manager/model_presets.rs b/codex-rs/core/src/models_manager/model_presets.rs index f9105c6448..a597f7f922 100644 --- a/codex-rs/core/src/models_manager/model_presets.rs +++ b/codex-rs/core/src/models_manager/model_presets.rs @@ -3,6 +3,7 @@ use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use indoc::indoc; use once_cell::sync::Lazy; @@ -41,6 +42,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex-max".to_string(), @@ -71,6 +73,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex-mini".to_string(), @@ -94,6 +97,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.2".to_string(), @@ -124,6 +128,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "bengalfox".to_string(), @@ -154,6 +159,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "boomslang".to_string(), @@ -184,6 +190,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, // Deprecated models. ModelPreset { @@ -211,6 +218,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5-codex-mini".to_string(), @@ -233,6 +241,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex".to_string(), @@ -260,6 +269,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5".to_string(), @@ -290,6 +300,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1".to_string(), @@ -316,6 +327,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ] }); diff --git a/codex-rs/core/tests/suite/list_models.rs b/codex-rs/core/tests/suite/list_models.rs index f6db54af7b..aee3a60e0f 100644 --- a/codex-rs/core/tests/suite/list_models.rs +++ b/codex-rs/core/tests/suite/list_models.rs @@ -7,6 +7,7 @@ use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use core_test_support::load_default_config_for_test; use indoc::indoc; use pretty_assertions::assert_eq; @@ -99,6 +100,7 @@ fn gpt_52_codex() -> ModelPreset { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -142,6 +144,7 @@ fn gpt_5_1_codex_max() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -177,6 +180,7 @@ fn gpt_5_1_codex_mini() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -222,6 +226,7 @@ fn gpt_5_2() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -255,6 +260,7 @@ fn bengalfox() -> ModelPreset { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -288,6 +294,7 @@ fn boomslang() -> ModelPreset { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -327,6 +334,7 @@ fn gpt_5_codex() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -362,6 +370,7 @@ fn gpt_5_codex_mini() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -401,6 +410,7 @@ fn gpt_5_1_codex() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -444,6 +454,7 @@ fn gpt_5() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -483,6 +494,7 @@ fn gpt_5_1() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index 5756f46a69..e144a35a8c 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -19,6 +19,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::responses; use core_test_support::responses::ev_assistant_message; @@ -186,5 +187,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index 79e0d36c68..3ab89fd4a4 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -16,6 +16,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::load_default_config_for_test; use core_test_support::responses::ev_completed; @@ -422,6 +423,7 @@ async fn ignores_remote_model_personality_if_remote_models_disabled() -> anyhow: auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( @@ -536,6 +538,7 @@ async fn remote_model_default_personality_instructions_with_feature() -> anyhow: auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( @@ -642,6 +645,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index f59f73fd14..ed46855f9c 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -25,6 +25,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::load_default_config_for_test; use core_test_support::responses::ev_assistant_message; @@ -76,6 +77,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { shell_type: ConfigShellToolType::UnifiedExec, visibility: ModelVisibility::List, supported_in_api: true, + input_modalities: default_input_modalities(), priority: 1, upgrade: None, base_instructions: "base instructions".to_string(), @@ -313,6 +315,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { shell_type: ConfigShellToolType::ShellCommand, visibility: ModelVisibility::List, supported_in_api: true, + input_modalities: default_input_modalities(), priority: 1, upgrade: None, base_instructions: remote_base.to_string(), @@ -787,6 +790,7 @@ fn test_remote_model_with_policy( shell_type: ConfigShellToolType::ShellCommand, visibility, supported_in_api: true, + input_modalities: default_input_modalities(), priority, upgrade: None, base_instructions: "base instructions".to_string(), diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 9a940539d5..a528f3d6a8 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -43,6 +43,34 @@ pub enum ReasoningEffort { XHigh, } +/// Input modalities supported by a model. +#[derive( + Debug, + Serialize, + Deserialize, + Default, + Clone, + Copy, + PartialEq, + Eq, + Display, + JsonSchema, + TS, + EnumIter, + Hash, +)] +#[serde(rename_all = "lowercase")] +#[strum(serialize_all = "lowercase")] +pub enum InputModality { + #[default] + Text, + Image, +} + +pub fn default_input_modalities() -> Vec { + vec![InputModality::Text, InputModality::Image] +} + /// A reasoning effort option that can be surfaced for a model. #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)] pub struct ReasoningEffortPreset { @@ -88,6 +116,8 @@ pub struct ModelPreset { pub show_in_picker: bool, /// whether this model is supported in the api pub supported_in_api: bool, + #[serde(default = "default_input_modalities")] + pub input_modalities: Vec, } /// Visibility of a model in the picker or APIs. @@ -206,6 +236,8 @@ pub struct ModelInfo { #[serde(default = "default_effective_context_window_percent")] pub effective_context_window_percent: i64, pub experimental_supported_tools: Vec, + #[serde(default = "default_input_modalities")] + pub input_modalities: Vec, } impl ModelInfo { @@ -350,6 +382,7 @@ impl From for ModelPreset { }), show_in_picker: info.visibility == ModelVisibility::List, supported_in_api: info.supported_in_api, + input_modalities: info.input_modalities, } } } @@ -460,6 +493,7 @@ mod tests { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: vec![], + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 979213112f..c969b72d5b 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -390,6 +390,10 @@ impl ChatComposer { self.skills = skills; } + pub fn set_image_paste_enabled(&mut self, enabled: bool) { + self.config.image_paste_enabled = enabled; + } + pub fn set_connector_mentions(&mut self, connectors_snapshot: Option) { self.connectors_snapshot = connectors_snapshot; } diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index a632fd4468..2a51178652 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -209,6 +209,11 @@ impl BottomPane { self.request_redraw(); } + pub fn set_image_paste_enabled(&mut self, enabled: bool) { + self.composer.set_image_paste_enabled(enabled); + self.request_redraw(); + } + pub fn set_connectors_snapshot(&mut self, snapshot: Option) { self.composer.set_connector_mentions(snapshot); self.request_redraw(); diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index b0c8c7cf33..6c3d082bde 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -208,6 +208,7 @@ use codex_core::ThreadManager; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; use codex_file_search::FileMatch; +use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::plan_tool::UpdatePlanArgs; @@ -2722,6 +2723,13 @@ impl ChatWidget { } pub(crate) fn attach_image(&mut self, path: PathBuf) { + if !self.current_model_supports_images() { + self.add_to_history(history_cell::new_warning_event( + self.image_inputs_not_supported_message(), + )); + self.request_redraw(); + return; + } tracing::info!("attach_image path={path:?}"); self.bottom_pane.attach_image(path); self.request_redraw(); @@ -3153,6 +3161,16 @@ impl ChatWidget { if text.is_empty() && local_images.is_empty() { return; } + if !local_images.is_empty() && !self.current_model_supports_images() { + let local_image_paths = local_images.iter().map(|img| img.path.clone()).collect(); + self.bottom_pane + .set_composer_text(text, text_elements, local_image_paths); + self.add_to_history(history_cell::new_warning_event( + self.image_inputs_not_supported_message(), + )); + self.request_redraw(); + return; + } let mut items: Vec = Vec::new(); @@ -5210,6 +5228,32 @@ impl ChatWidget { .unwrap_or(false) } + fn current_model_supports_images(&self) -> bool { + let model = self.current_model(); + self.models_manager + .try_list_models(&self.config) + .ok() + .and_then(|models| { + models + .into_iter() + .find(|preset| preset.model == model) + .map(|preset| preset.input_modalities.contains(&InputModality::Image)) + }) + .unwrap_or(true) + } + + fn sync_image_paste_enabled(&mut self) { + let enabled = self.current_model_supports_images(); + self.bottom_pane.set_image_paste_enabled(enabled); + } + + fn image_inputs_not_supported_message(&self) -> String { + format!( + "Model {} does not support image inputs. Remove images or switch models.", + self.current_model() + ) + } + #[allow(dead_code)] // Used in tests pub(crate) fn current_collaboration_mode(&self) -> &CollaborationMode { &self.current_collaboration_mode @@ -5282,6 +5326,7 @@ impl ChatWidget { fn refresh_model_display(&mut self) { let effective = self.effective_collaboration_mode(); self.session_header.set_model(effective.model()); + self.sync_image_paste_enabled(); } fn model_display_name(&self) -> &str { diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index d24504578c..a58fa13619 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -70,6 +70,7 @@ use codex_protocol::config_types::Personality; use codex_protocol::config_types::Settings; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::plan_tool::PlanItemArg; use codex_protocol::plan_tool::StepStatus; @@ -3055,6 +3056,7 @@ async fn model_picker_hides_show_in_picker_false_models_from_cache() { upgrade: None, show_in_picker, supported_in_api: true, + input_modalities: default_input_modalities(), }; chat.open_model_popup_with_presets(vec![ @@ -3293,6 +3295,7 @@ async fn single_reasoning_option_skips_selection() { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }; chat.open_reasoning_popup(preset);