mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
Add text element metadata to protocol, app server, and core (#9331)
The second part of breaking up PR https://github.com/openai/codex/pull/9116 Summary: - Add `TextElement` / `ByteRange` to protocol user inputs and user message events with defaults. - Thread `text_elements` through app-server v1/v2 request handling and history rebuild. - Preserve UI metadata only in user input/events (not `ContentItem`) while keeping local image attachments in user events for rehydration. Details: - Protocol: `UserInput::Text` carries `text_elements`; `UserMessageEvent` carries `text_elements` + `local_images`. Serialization includes empty vectors for backward compatibility. - app-server-protocol: v1 defines `V1TextElement` / `V1ByteRange` in camelCase with conversions; v2 uses its own camelCase wrapper. - app-server: v1/v2 input mapping includes `text_elements`; thread history rebuilds include them. - Core: user event emission preserves UI metadata while model history stays clean; history replay round-trips the metadata.
This commit is contained in:
@@ -197,8 +197,12 @@ impl ThreadHistoryBuilder {
|
||||
if !payload.message.trim().is_empty() {
|
||||
content.push(UserInput::Text {
|
||||
text: payload.message.clone(),
|
||||
// TODO: Thread text element ranges into thread history. Empty keeps old behavior.
|
||||
text_elements: Vec::new(),
|
||||
text_elements: payload
|
||||
.text_elements
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(Into::into)
|
||||
.collect(),
|
||||
});
|
||||
}
|
||||
if let Some(images) = &payload.images {
|
||||
@@ -206,6 +210,9 @@ impl ThreadHistoryBuilder {
|
||||
content.push(UserInput::Image { url: image.clone() });
|
||||
}
|
||||
}
|
||||
for path in &payload.local_images {
|
||||
content.push(UserInput::LocalImage { path: path.clone() });
|
||||
}
|
||||
content
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@ use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use codex_protocol::user_input::ByteRange as CoreByteRange;
|
||||
use codex_protocol::user_input::TextElement as CoreTextElement;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
@@ -444,9 +446,74 @@ pub struct RemoveConversationListenerParams {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
pub enum InputItem {
|
||||
Text { text: String },
|
||||
Image { image_url: String },
|
||||
LocalImage { path: PathBuf },
|
||||
Text {
|
||||
text: String,
|
||||
/// UI-defined spans within `text` used to render or persist special elements.
|
||||
#[serde(default)]
|
||||
text_elements: Vec<V1TextElement>,
|
||||
},
|
||||
Image {
|
||||
image_url: String,
|
||||
},
|
||||
LocalImage {
|
||||
path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename = "ByteRange")]
|
||||
pub struct V1ByteRange {
|
||||
/// Start byte offset (inclusive) within the UTF-8 text buffer.
|
||||
pub start: usize,
|
||||
/// End byte offset (exclusive) within the UTF-8 text buffer.
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl From<CoreByteRange> for V1ByteRange {
|
||||
fn from(value: CoreByteRange) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V1ByteRange> for CoreByteRange {
|
||||
fn from(value: V1ByteRange) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename = "TextElement")]
|
||||
pub struct V1TextElement {
|
||||
/// Byte range in the parent `text` buffer that this element occupies.
|
||||
pub byte_range: V1ByteRange,
|
||||
/// Optional human-readable placeholder for the element, displayed in the UI.
|
||||
pub placeholder: Option<String>,
|
||||
}
|
||||
|
||||
impl From<CoreTextElement> for V1TextElement {
|
||||
fn from(value: CoreTextElement) -> Self {
|
||||
Self {
|
||||
byte_range: value.byte_range.into(),
|
||||
placeholder: value.placeholder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V1TextElement> for CoreTextElement {
|
||||
fn from(value: V1TextElement) -> Self {
|
||||
Self {
|
||||
byte_range: value.byte_range.into(),
|
||||
placeholder: value.placeholder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
|
||||
@@ -30,6 +30,8 @@ use codex_protocol::protocol::SkillMetadata as CoreSkillMetadata;
|
||||
use codex_protocol::protocol::SkillScope as CoreSkillScope;
|
||||
use codex_protocol::protocol::TokenUsage as CoreTokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo as CoreTokenUsageInfo;
|
||||
use codex_protocol::user_input::ByteRange as CoreByteRange;
|
||||
use codex_protocol::user_input::TextElement as CoreTextElement;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use mcp_types::ContentBlock as McpContentBlock;
|
||||
@@ -1589,6 +1591,24 @@ pub struct ByteRange {
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl From<CoreByteRange> for ByteRange {
|
||||
fn from(value: CoreByteRange) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ByteRange> for CoreByteRange {
|
||||
fn from(value: ByteRange) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
@@ -1599,6 +1619,24 @@ pub struct TextElement {
|
||||
pub placeholder: Option<String>,
|
||||
}
|
||||
|
||||
impl From<CoreTextElement> for TextElement {
|
||||
fn from(value: CoreTextElement) -> Self {
|
||||
Self {
|
||||
byte_range: value.byte_range.into(),
|
||||
placeholder: value.placeholder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TextElement> for CoreTextElement {
|
||||
fn from(value: TextElement) -> Self {
|
||||
Self {
|
||||
byte_range: value.byte_range.into(),
|
||||
placeholder: value.placeholder,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -1625,10 +1663,12 @@ pub enum UserInput {
|
||||
impl UserInput {
|
||||
pub fn into_core(self) -> CoreUserInput {
|
||||
match self {
|
||||
UserInput::Text { text, .. } => CoreUserInput::Text {
|
||||
UserInput::Text {
|
||||
text,
|
||||
// TODO: Thread text element ranges into v2 inputs. Empty keeps old behavior.
|
||||
text_elements: Vec::new(),
|
||||
text_elements,
|
||||
} => CoreUserInput::Text {
|
||||
text,
|
||||
text_elements: text_elements.into_iter().map(Into::into).collect(),
|
||||
},
|
||||
UserInput::Image { url } => CoreUserInput::Image { image_url: url },
|
||||
UserInput::LocalImage { path } => CoreUserInput::LocalImage { path },
|
||||
@@ -1640,10 +1680,12 @@ impl UserInput {
|
||||
impl From<CoreUserInput> for UserInput {
|
||||
fn from(value: CoreUserInput) -> Self {
|
||||
match value {
|
||||
CoreUserInput::Text { text, .. } => UserInput::Text {
|
||||
CoreUserInput::Text {
|
||||
text,
|
||||
// TODO: Thread text element ranges from core into v2 inputs.
|
||||
text_elements: Vec::new(),
|
||||
text_elements,
|
||||
} => UserInput::Text {
|
||||
text,
|
||||
text_elements: text_elements.into_iter().map(Into::into).collect(),
|
||||
},
|
||||
CoreUserInput::Image { image_url } => UserInput::Image { url: image_url },
|
||||
CoreUserInput::LocalImage { path } => UserInput::LocalImage { path },
|
||||
|
||||
Reference in New Issue
Block a user