Files
codex/codex-rs/protocol/src/models.rs
Eric Traut 069a38a06c Add missing "nullable" macro to protocol structs that contain optional fields (#5901)
This PR addresses a current hole in the TypeScript code generation for
the API server protocol. Fields that are marked as "Optional<>" in the
Rust code are serialized such that the value is omitted when it is
deserialized — appearing as `undefined`, but the TS type indicates
(incorrectly) that it is always defined but possibly `null`. This can
lead to subtle errors that the TypeScript compiler doesn't catch. The
fix is to include the `#[ts(optional_fields = nullable)]` macro for all
protocol structs that contain one or more `Optional<>` fields.

This PR also includes a new test that validates that all TS protocol
code containing "| null" in its type is marked optional ("?") to catch
cases where `#[ts(optional_fields = nullable)]` is omitted.
2025-10-29 12:09:47 -07:00

695 lines
24 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use std::collections::HashMap;
use base64::Engine;
use codex_utils_image::load_and_resize_to_fit;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use serde::Deserialize;
use serde::Deserializer;
use serde::Serialize;
use serde::ser::Serializer;
use ts_rs::TS;
use crate::user_input::UserInput;
use codex_git::GhostCommit;
use codex_utils_image::error::ImageProcessingError;
use schemars::JsonSchema;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseInputItem {
Message {
role: String,
content: Vec<ContentItem>,
},
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
McpToolCallOutput {
call_id: String,
result: Result<CallToolResult, String>,
},
CustomToolCallOutput {
call_id: String,
output: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentItem {
InputText { text: String },
InputImage { image_url: String },
OutputText { text: String },
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseItem {
Message {
#[serde(skip_serializing)]
#[ts(optional = nullable)]
id: Option<String>,
role: String,
content: Vec<ContentItem>,
},
Reasoning {
#[serde(default, skip_serializing)]
id: String,
summary: Vec<ReasoningItemReasoningSummary>,
#[serde(default, skip_serializing_if = "should_serialize_reasoning_content")]
#[ts(optional = nullable)]
content: Option<Vec<ReasoningItemContent>>,
#[ts(optional = nullable)]
encrypted_content: Option<String>,
},
LocalShellCall {
/// Set when using the chat completions API.
#[serde(skip_serializing)]
#[ts(optional = nullable)]
id: Option<String>,
/// Set when using the Responses API.
#[ts(optional = nullable)]
call_id: Option<String>,
status: LocalShellStatus,
action: LocalShellAction,
},
FunctionCall {
#[serde(skip_serializing)]
#[ts(optional = nullable)]
id: Option<String>,
name: String,
// The Responses API returns the function call arguments as a *string* that contains
// JSON, not as an alreadyparsed object. We keep it as a raw string here and let
// Session::handle_function_call parse it into a Value. This exactly matches the
// Chat Completions + Responses API behavior.
arguments: String,
call_id: String,
},
// NOTE: The input schema for `function_call_output` objects that clients send to the
// OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the
// SSE stream. When *sending* we must wrap the string output inside an object that includes a
// required `success` boolean. To ensure we serialize exactly the expected shape we introduce
// a dedicated payload struct and flatten it here.
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
CustomToolCall {
#[serde(skip_serializing)]
#[ts(optional = nullable)]
id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional = nullable)]
status: Option<String>,
call_id: String,
name: String,
input: String,
},
CustomToolCallOutput {
call_id: String,
output: String,
},
// Emitted by the Responses API when the agent triggers a web search.
// Example payload (from SSE `response.output_item.done`):
// {
// "id":"ws_...",
// "type":"web_search_call",
// "status":"completed",
// "action": {"type":"search","query":"weather: San Francisco, CA"}
// }
WebSearchCall {
#[serde(skip_serializing)]
#[ts(optional = nullable)]
id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional = nullable)]
status: Option<String>,
action: WebSearchAction,
},
// Generated by the harness but considered exactly as a model response.
GhostSnapshot {
ghost_commit: GhostCommit,
},
#[serde(other)]
Other,
}
fn should_serialize_reasoning_content(content: &Option<Vec<ReasoningItemContent>>) -> bool {
match content {
Some(content) => !content
.iter()
.any(|c| matches!(c, ReasoningItemContent::ReasoningText { .. })),
None => false,
}
}
fn local_image_error_placeholder(
path: &std::path::Path,
error: impl std::fmt::Display,
) -> ContentItem {
ContentItem::InputText {
text: format!(
"Codex could not read the local image at `{}`: {}",
path.display(),
error
),
}
}
impl From<ResponseInputItem> for ResponseItem {
fn from(item: ResponseInputItem) -> Self {
match item {
ResponseInputItem::Message { role, content } => Self::Message {
role,
content,
id: None,
},
ResponseInputItem::FunctionCallOutput { call_id, output } => {
Self::FunctionCallOutput { call_id, output }
}
ResponseInputItem::McpToolCallOutput { call_id, result } => {
let output = match result {
Ok(result) => FunctionCallOutputPayload::from(&result),
Err(tool_call_err) => FunctionCallOutputPayload {
content: format!("err: {tool_call_err:?}"),
success: Some(false),
..Default::default()
},
};
Self::FunctionCallOutput { call_id, output }
}
ResponseInputItem::CustomToolCallOutput { call_id, output } => {
Self::CustomToolCallOutput { call_id, output }
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum LocalShellStatus {
Completed,
InProgress,
Incomplete,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum LocalShellAction {
Exec(LocalShellExecAction),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[ts(optional_fields = nullable)]
pub struct LocalShellExecAction {
pub command: Vec<String>,
pub timeout_ms: Option<u64>,
pub working_directory: Option<String>,
pub env: Option<HashMap<String, String>>,
pub user: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum WebSearchAction {
Search {
query: String,
},
#[serde(other)]
Other,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ReasoningItemReasoningSummary {
SummaryText { text: String },
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ReasoningItemContent {
ReasoningText { text: String },
Text { text: String },
}
impl From<Vec<UserInput>> for ResponseInputItem {
fn from(items: Vec<UserInput>) -> Self {
Self::Message {
role: "user".to_string(),
content: items
.into_iter()
.map(|c| match c {
UserInput::Text { text } => ContentItem::InputText { text },
UserInput::Image { image_url } => ContentItem::InputImage { image_url },
UserInput::LocalImage { path } => match load_and_resize_to_fit(&path) {
Ok(image) => ContentItem::InputImage {
image_url: image.into_data_url(),
},
Err(err) => {
tracing::warn!("Failed to resize image {}: {}", path.display(), err);
if matches!(&err, ImageProcessingError::Read { .. }) {
local_image_error_placeholder(&path, &err)
} else {
match std::fs::read(&path) {
Ok(bytes) => {
let Some(mime_guess) = mime_guess::from_path(&path).first()
else {
return local_image_error_placeholder(
&path,
"unsupported MIME type (unknown)",
);
};
let mime = mime_guess.essence_str().to_owned();
if !mime.starts_with("image/") {
return local_image_error_placeholder(
&path,
format!("unsupported MIME type `{mime}`"),
);
}
let encoded =
base64::engine::general_purpose::STANDARD.encode(bytes);
ContentItem::InputImage {
image_url: format!("data:{mime};base64,{encoded}"),
}
}
Err(read_err) => {
tracing::warn!(
"Skipping image {} could not read file: {}",
path.display(),
read_err
);
local_image_error_placeholder(&path, &read_err)
}
}
}
}
},
})
.collect::<Vec<ContentItem>>(),
}
}
}
/// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
/// or shell`, the `arguments` field should deserialize to this struct.
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[ts(optional_fields = nullable)]
pub struct ShellToolCallParams {
pub command: Vec<String>,
pub workdir: Option<String>,
/// This is the maximum time in milliseconds that the command is allowed to run.
#[serde(alias = "timeout")]
pub timeout_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub with_escalated_permissions: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub justification: Option<String>,
}
/// Responses API compatible content items that can be returned by a tool call.
/// This is a subset of ContentItem with the types we support as function call outputs.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum FunctionCallOutputContentItem {
// Do not rename, these are serialized and used directly in the responses API.
InputText { text: String },
// Do not rename, these are serialized and used directly in the responses API.
InputImage { image_url: String },
}
/// The payload we send back to OpenAI when reporting a tool call result.
///
/// `content` preserves the historical plain-string payload so downstream
/// integrations (tests, logging, etc.) can keep treating tool output as
/// `String`. When an MCP server returns richer data we additionally populate
/// `content_items` with the structured form that the Responses/Chat
/// Completions APIs understand.
#[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[ts(optional_fields = nullable)]
pub struct FunctionCallOutputPayload {
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_items: Option<Vec<FunctionCallOutputContentItem>>,
// TODO(jif) drop this.
pub success: Option<bool>,
}
#[derive(Deserialize)]
#[serde(untagged)]
enum FunctionCallOutputPayloadSerde {
Text(String),
Items(Vec<FunctionCallOutputContentItem>),
}
// The Responses API expects two *different* shapes depending on success vs failure:
// • success → output is a plain string (no nested object)
// • failure → output is an object { content, success:false }
impl Serialize for FunctionCallOutputPayload {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if let Some(items) = &self.content_items {
items.serialize(serializer)
} else {
serializer.serialize_str(&self.content)
}
}
}
impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
match FunctionCallOutputPayloadSerde::deserialize(deserializer)? {
FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload {
content,
..Default::default()
}),
FunctionCallOutputPayloadSerde::Items(items) => {
let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?;
Ok(FunctionCallOutputPayload {
content,
content_items: Some(items),
success: None,
})
}
}
}
}
impl From<&CallToolResult> for FunctionCallOutputPayload {
fn from(call_tool_result: &CallToolResult) -> Self {
let CallToolResult {
content,
structured_content,
is_error,
} = call_tool_result;
let is_success = is_error != &Some(true);
if let Some(structured_content) = structured_content
&& !structured_content.is_null()
{
match serde_json::to_string(structured_content) {
Ok(serialized_structured_content) => {
return FunctionCallOutputPayload {
content: serialized_structured_content,
success: Some(is_success),
..Default::default()
};
}
Err(err) => {
return FunctionCallOutputPayload {
content: err.to_string(),
success: Some(false),
..Default::default()
};
}
}
}
let serialized_content = match serde_json::to_string(content) {
Ok(serialized_content) => serialized_content,
Err(err) => {
return FunctionCallOutputPayload {
content: err.to_string(),
success: Some(false),
..Default::default()
};
}
};
let content_items = convert_content_blocks_to_items(content);
FunctionCallOutputPayload {
content: serialized_content,
content_items,
success: Some(is_success),
}
}
}
fn convert_content_blocks_to_items(
blocks: &[ContentBlock],
) -> Option<Vec<FunctionCallOutputContentItem>> {
let mut saw_image = false;
let mut items = Vec::with_capacity(blocks.len());
for block in blocks {
match block {
ContentBlock::TextContent(text) => {
items.push(FunctionCallOutputContentItem::InputText {
text: text.text.clone(),
});
}
ContentBlock::ImageContent(image) => {
saw_image = true;
// Just in case the content doesn't include a data URL, add it.
let image_url = if image.data.starts_with("data:") {
image.data.clone()
} else {
format!("data:{};base64,{}", image.mime_type, image.data)
};
items.push(FunctionCallOutputContentItem::InputImage { image_url });
}
// TODO: render audio, resource, and embedded resource content to the model.
_ => return None,
}
}
if saw_image { Some(items) } else { None }
}
// Implement Display so callers can treat the payload like a plain string when logging or doing
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
// returns the raw `content` field.
impl std::fmt::Display for FunctionCallOutputPayload {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.content)
}
}
impl std::ops::Deref for FunctionCallOutputPayload {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.content
}
}
// (Moved event mapping logic into codex-core to avoid coupling protocol to UI-facing events.)
#[cfg(test)]
mod tests {
use super::*;
use anyhow::Result;
use mcp_types::ImageContent;
use mcp_types::TextContent;
use tempfile::tempdir;
#[test]
fn serializes_success_as_plain_string() -> Result<()> {
let item = ResponseInputItem::FunctionCallOutput {
call_id: "call1".into(),
output: FunctionCallOutputPayload {
content: "ok".into(),
..Default::default()
},
};
let json = serde_json::to_string(&item)?;
let v: serde_json::Value = serde_json::from_str(&json)?;
// Success case -> output should be a plain string
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok");
Ok(())
}
#[test]
fn serializes_failure_as_string() -> Result<()> {
let item = ResponseInputItem::FunctionCallOutput {
call_id: "call1".into(),
output: FunctionCallOutputPayload {
content: "bad".into(),
success: Some(false),
..Default::default()
},
};
let json = serde_json::to_string(&item)?;
let v: serde_json::Value = serde_json::from_str(&json)?;
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad");
Ok(())
}
#[test]
fn serializes_image_outputs_as_array() -> Result<()> {
let call_tool_result = CallToolResult {
content: vec![
ContentBlock::TextContent(TextContent {
annotations: None,
text: "caption".into(),
r#type: "text".into(),
}),
ContentBlock::ImageContent(ImageContent {
annotations: None,
data: "BASE64".into(),
mime_type: "image/png".into(),
r#type: "image".into(),
}),
],
is_error: None,
structured_content: None,
};
let payload = FunctionCallOutputPayload::from(&call_tool_result);
assert_eq!(payload.success, Some(true));
let items = payload.content_items.clone().expect("content items");
assert_eq!(
items,
vec![
FunctionCallOutputContentItem::InputText {
text: "caption".into(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,BASE64".into(),
},
]
);
let item = ResponseInputItem::FunctionCallOutput {
call_id: "call1".into(),
output: payload,
};
let json = serde_json::to_string(&item)?;
let v: serde_json::Value = serde_json::from_str(&json)?;
let output = v.get("output").expect("output field");
assert!(output.is_array(), "expected array output");
Ok(())
}
#[test]
fn deserializes_array_payload_into_items() -> Result<()> {
let json = r#"[
{"type": "input_text", "text": "note"},
{"type": "input_image", "image_url": "data:image/png;base64,XYZ"}
]"#;
let payload: FunctionCallOutputPayload = serde_json::from_str(json)?;
assert_eq!(payload.success, None);
let expected_items = vec![
FunctionCallOutputContentItem::InputText {
text: "note".into(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,XYZ".into(),
},
];
assert_eq!(payload.content_items, Some(expected_items.clone()));
let expected_content = serde_json::to_string(&expected_items)?;
assert_eq!(payload.content, expected_content);
Ok(())
}
#[test]
fn deserialize_shell_tool_call_params() -> Result<()> {
let json = r#"{
"command": ["ls", "-l"],
"workdir": "/tmp",
"timeout": 1000
}"#;
let params: ShellToolCallParams = serde_json::from_str(json)?;
assert_eq!(
ShellToolCallParams {
command: vec!["ls".to_string(), "-l".to_string()],
workdir: Some("/tmp".to_string()),
timeout_ms: Some(1000),
with_escalated_permissions: None,
justification: None,
},
params
);
Ok(())
}
#[test]
fn local_image_read_error_adds_placeholder() -> Result<()> {
let dir = tempdir()?;
let missing_path = dir.path().join("missing-image.png");
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
path: missing_path.clone(),
}]);
match item {
ResponseInputItem::Message { content, .. } => {
assert_eq!(content.len(), 1);
match &content[0] {
ContentItem::InputText { text } => {
let display_path = missing_path.display().to_string();
assert!(
text.contains(&display_path),
"placeholder should mention missing path: {text}"
);
assert!(
text.contains("could not read"),
"placeholder should mention read issue: {text}"
);
}
other => panic!("expected placeholder text but found {other:?}"),
}
}
other => panic!("expected message response but got {other:?}"),
}
Ok(())
}
#[test]
fn local_image_non_image_adds_placeholder() -> Result<()> {
let dir = tempdir()?;
let json_path = dir.path().join("example.json");
std::fs::write(&json_path, br#"{"hello":"world"}"#)?;
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
path: json_path.clone(),
}]);
match item {
ResponseInputItem::Message { content, .. } => {
assert_eq!(content.len(), 1);
match &content[0] {
ContentItem::InputText { text } => {
assert!(
text.contains("unsupported MIME type `application/json`"),
"placeholder should mention unsupported MIME: {text}"
);
assert!(
text.contains(&json_path.display().to_string()),
"placeholder should mention path: {text}"
);
}
other => panic!("expected placeholder text but found {other:?}"),
}
}
other => panic!("expected message response but got {other:?}"),
}
Ok(())
}
}