mirror of
https://github.com/openai/codex.git
synced 2026-05-03 19:06:58 +00:00
[Codex][CLI] Gate image inputs by model modalities (#10271)
###### Summary - Add input_modalities to model metadata so clients can determine supported input types. - Gate image paste/attach in TUI when the selected model does not support images. - Block submits that include images for unsupported models and show a clear warning. - Propagate modality metadata through app-server protocol/model-list responses. - Update related tests/fixtures. ###### Rationale - Models support different input modalities. - Clients need an explicit capability signal to prevent unsupported requests. - Backward-compatible defaults preserve existing behavior when modality metadata is absent. ###### Scope - codex-rs/protocol, codex-rs/core, codex-rs/tui - codex-rs/app-server-protocol, codex-rs/app-server - Generated app-server types / schema fixtures ###### Trade-offs - Default behavior assumes text + image when field is absent for compatibility. - Server-side validation remains the source of truth. ###### Follow-up - Non-TUI clients should consume input_modalities to disable unsupported attachments. - Model catalogs should explicitly set input_modalities for text-only models. ###### Testing - cargo fmt --all - cargo test -p codex-tui - env -u GITHUB_APP_KEY cargo test -p codex-core --lib - just write-app-server-schema - cargo run -p codex-cli --bin codex -- app-server generate-ts --out app-server-types - test against local backend <img width="695" height="199" alt="image" src="https://github.com/user-attachments/assets/d22dd04f-5eba-4db9-a7c5-a2506f60ec44" /> --------- Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
@@ -1,3 +1,8 @@
|
||||
//! Shared model metadata types exchanged between Codex services and clients.
|
||||
//!
|
||||
//! These types are serialized across core, TUI, app-server, and SDK boundaries, so field defaults
|
||||
//! are used to preserve compatibility when older payloads omit newly introduced attributes.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
@@ -43,6 +48,38 @@ pub enum ReasoningEffort {
|
||||
XHigh,
|
||||
}
|
||||
|
||||
/// Canonical user-input modality tags advertised by a model.
|
||||
#[derive(
|
||||
Debug,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
Clone,
|
||||
Copy,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Display,
|
||||
JsonSchema,
|
||||
TS,
|
||||
EnumIter,
|
||||
Hash,
|
||||
)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[strum(serialize_all = "lowercase")]
|
||||
pub enum InputModality {
|
||||
/// Plain text turns and tool payloads.
|
||||
Text,
|
||||
/// Image attachments included in user turns.
|
||||
Image,
|
||||
}
|
||||
|
||||
/// Backward-compatible default when `input_modalities` is omitted on the wire.
|
||||
///
|
||||
/// Legacy payloads predate modality metadata, so we conservatively assume both text and images are
|
||||
/// accepted unless a preset explicitly narrows support.
|
||||
pub fn default_input_modalities() -> Vec<InputModality> {
|
||||
vec![InputModality::Text, InputModality::Image]
|
||||
}
|
||||
|
||||
/// A reasoning effort option that can be surfaced for a model.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)]
|
||||
pub struct ReasoningEffortPreset {
|
||||
@@ -88,6 +125,9 @@ pub struct ModelPreset {
|
||||
pub show_in_picker: bool,
|
||||
/// whether this model is supported in the api
|
||||
pub supported_in_api: bool,
|
||||
/// Input modalities accepted when composing user turns for this preset.
|
||||
#[serde(default = "default_input_modalities")]
|
||||
pub input_modalities: Vec<InputModality>,
|
||||
}
|
||||
|
||||
/// Visibility of a model in the picker or APIs.
|
||||
@@ -206,6 +246,9 @@ pub struct ModelInfo {
|
||||
#[serde(default = "default_effective_context_window_percent")]
|
||||
pub effective_context_window_percent: i64,
|
||||
pub experimental_supported_tools: Vec<String>,
|
||||
/// Input modalities accepted by the backend for this model.
|
||||
#[serde(default = "default_input_modalities")]
|
||||
pub input_modalities: Vec<InputModality>,
|
||||
}
|
||||
|
||||
impl ModelInfo {
|
||||
@@ -350,6 +393,7 @@ impl From<ModelInfo> for ModelPreset {
|
||||
}),
|
||||
show_in_picker: info.visibility == ModelVisibility::List,
|
||||
supported_in_api: info.supported_in_api,
|
||||
input_modalities: info.input_modalities,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -460,6 +504,7 @@ mod tests {
|
||||
auto_compact_token_limit: None,
|
||||
effective_context_window_percent: 95,
|
||||
experimental_supported_tools: vec![],
|
||||
input_modalities: default_input_modalities(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user