Files
codex/codex-rs/protocol/src/user_input.rs
Curtis 'Fjord' Hawthorne 8543e39885 Preserve image detail in app-server inputs (#20693)
## Summary

- Add optional image detail to user image inputs across core, app-server
v2, thread history/event mapping, and the generated app-server
schemas/types.
- Preserve requested detail when serializing Responses image inputs:
omitted detail stays on the existing `high` default, while explicit
`original` keeps local images on the original-resolution path.
- Support `high`/`original` consistently for tool image outputs,
including MCP `codex/imageDetail`, code-mode image helpers, and
`view_image`.
2026-05-15 15:04:04 -07:00

122 lines
3.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use ts_rs::TS;
use crate::models::ImageDetail;
/// Conservative cap so one user message cannot monopolize a large context window.
pub const MAX_USER_INPUT_TEXT_CHARS: usize = 1 << 20;
/// User input
#[non_exhaustive]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum UserInput {
Text {
text: String,
/// UI-defined spans within `text` that should be treated as special elements.
/// These are byte ranges into the UTF-8 `text` buffer and are used to render
/// or persist rich input markers (e.g., image placeholders) across history
/// and resume without mutating the literal text.
#[serde(default)]
text_elements: Vec<TextElement>,
},
/// Preencoded data: URI image.
Image {
image_url: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
detail: Option<ImageDetail>,
},
/// Local image path provided by the user. This will be converted to an
/// `Image` variant (base64 data URL) during request serialization.
LocalImage {
path: std::path::PathBuf,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
detail: Option<ImageDetail>,
},
/// Skill selected by the user (name + path to SKILL.md).
Skill {
name: String,
path: std::path::PathBuf,
},
/// Explicit structured mention selected by the user.
///
/// `path` identifies the exact mention target, for example
/// `app://<connector-id>` or `plugin://<plugin-name>@<marketplace-name>`.
Mention { name: String, path: String },
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS, JsonSchema)]
pub struct TextElement {
/// Byte range in the parent `text` buffer that this element occupies.
pub byte_range: ByteRange,
/// Optional human-readable placeholder for the element, displayed in the UI.
placeholder: Option<String>,
}
impl TextElement {
pub fn new(byte_range: ByteRange, placeholder: Option<String>) -> Self {
Self {
byte_range,
placeholder,
}
}
/// Returns a copy of this element with a remapped byte range.
///
/// The placeholder is preserved as-is; callers must ensure the new range
/// still refers to the same logical element (and same placeholder)
/// within the new text.
pub fn map_range<F>(&self, map: F) -> Self
where
F: FnOnce(ByteRange) -> ByteRange,
{
Self {
byte_range: map(self.byte_range),
placeholder: self.placeholder.clone(),
}
}
pub fn set_placeholder(&mut self, placeholder: Option<String>) {
self.placeholder = placeholder;
}
/// Returns the stored placeholder without falling back to the text buffer.
///
/// This must only be used inside `From<TextElement>` implementations on equivalent
/// protocol types where the source text is unavailable. Prefer `placeholder(text)`
/// everywhere else.
#[doc(hidden)]
pub fn _placeholder_for_conversion_only(&self) -> Option<&str> {
self.placeholder.as_deref()
}
pub fn placeholder<'a>(&'a self, text: &'a str) -> Option<&'a str> {
self.placeholder
.as_deref()
.or_else(|| text.get(self.byte_range.start..self.byte_range.end))
}
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, TS, JsonSchema)]
pub struct ByteRange {
/// Start byte offset (inclusive) within the UTF-8 text buffer.
pub start: usize,
/// End byte offset (exclusive) within the UTF-8 text buffer.
pub end: usize,
}
impl From<std::ops::Range<usize>> for ByteRange {
fn from(range: std::ops::Range<usize>) -> Self {
Self {
start: range.start,
end: range.end,
}
}
}