Compare commits

..

1 Commits

Author SHA1 Message Date
Shijie Rao
9031fe9f7a Add cosign signing for Linux release artifacts 2025-10-22 12:44:56 -07:00
51 changed files with 2040 additions and 2298 deletions

View File

@@ -327,6 +327,38 @@ jobs:
zstd -T0 -19 --rm "$dest/$base"
done
- if: ${{ contains(matrix.target, 'unknown-linux') }}
name: Install cosign
uses: sigstore/cosign-installer@v3.6.0
- if: ${{ contains(matrix.target, 'unknown-linux') }}
name: Sign Linux artifacts
shell: bash
env:
COSIGN_EXPERIMENTAL: "1"
COSIGN_YES: "true"
run: |
set -euo pipefail
dest="dist/${{ matrix.target }}"
shopt -s nullglob
for artifact in "$dest"/*; do
[[ -f "$artifact" ]] || continue
case "$artifact" in
*.sig|*.pem)
continue
;;
esac
cosign sign-blob \
--yes \
--output-signature "${artifact}.sig" \
--output-certificate "${artifact}.pem" \
"$artifact"
done
- name: Remove signing keychain
if: ${{ always() && matrix.runner == 'macos-15-xlarge' }}
shell: bash

14
codex-rs/Cargo.lock generated
View File

@@ -1061,6 +1061,7 @@ dependencies = [
"codex-apply-patch",
"codex-async-utils",
"codex-file-search",
"codex-mcp-client",
"codex-otel",
"codex-protocol",
"codex-rmcp-client",
@@ -1248,6 +1249,19 @@ dependencies = [
"wiremock",
]
[[package]]
name = "codex-mcp-client"
version = "0.0.0"
dependencies = [
"anyhow",
"mcp-types",
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "codex-mcp-server"
version = "0.0.0"

View File

@@ -20,6 +20,7 @@ members = [
"git-tooling",
"linux-sandbox",
"login",
"mcp-client",
"mcp-server",
"mcp-types",
"ollama",
@@ -67,6 +68,7 @@ codex-file-search = { path = "file-search" }
codex-git-tooling = { path = "git-tooling" }
codex-linux-sandbox = { path = "linux-sandbox" }
codex-login = { path = "login" }
codex-mcp-client = { path = "mcp-client" }
codex-mcp-server = { path = "mcp-server" }
codex-ollama = { path = "ollama" }
codex-otel = { path = "otel" }

View File

@@ -90,8 +90,9 @@ use codex_login::ShutdownHandle;
use codex_login::run_login_server;
use codex_protocol::ConversationId;
use codex_protocol::config_types::ForcedLoginMethod;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::InputMessageKind;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
use codex_protocol::user_input::UserInput as CoreInputItem;
@@ -939,9 +940,18 @@ impl CodexMessageProcessor {
},
))
.await;
let initial_messages = session_configured
.initial_messages
.map(|msgs| msgs.into_iter().collect());
let initial_messages = session_configured.initial_messages.map(|msgs| {
msgs.into_iter()
.filter(|event| {
// Don't send non-plain user messages (like user instructions
// or environment context) back so they don't get rendered.
if let EventMsg::UserMessage(user_message) = event {
return matches!(user_message.kind, Some(InputMessageKind::Plain));
}
true
})
.collect()
});
// Reply with conversation id + model and initial messages (when present)
let response = codex_app_server_protocol::ResumeConversationResponse {
@@ -1586,8 +1596,18 @@ fn extract_conversation_summary(
let preview = head
.iter()
.filter_map(|value| serde_json::from_value::<ResponseItem>(value.clone()).ok())
.find_map(|item| match codex_core::parse_turn_item(&item) {
Some(TurnItem::UserMessage(user)) => Some(user.message()),
.find_map(|item| match item {
ResponseItem::Message { content, .. } => {
content.into_iter().find_map(|content| match content {
ContentItem::InputText { text } => {
match InputMessageKind::from(("user", &text)) {
InputMessageKind::Plain => Some(text),
_ => None,
}
}
_ => None,
})
}
_ => None,
})?;

View File

@@ -30,6 +30,7 @@ use codex_protocol::config_types::SandboxMode;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::InputMessageKind;
use pretty_assertions::assert_eq;
use std::env;
use tempfile::TempDir;
@@ -527,6 +528,43 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
.expect("sendUserTurn 2 timeout")
.expect("sendUserTurn 2 resp");
let mut env_message: Option<String> = None;
let second_cwd_str = second_cwd.to_string_lossy().into_owned();
for _ in 0..10 {
let notification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/user_message"),
)
.await
.expect("user_message timeout")
.expect("user_message notification");
let params = notification
.params
.clone()
.expect("user_message should include params");
let event: Event = serde_json::from_value(params).expect("deserialize user_message event");
if let EventMsg::UserMessage(user) = event.msg
&& matches!(user.kind, Some(InputMessageKind::EnvironmentContext))
&& user.message.contains(&second_cwd_str)
{
env_message = Some(user.message);
break;
}
}
let env_message = env_message.expect("expected environment context update");
assert!(
env_message.contains("<sandbox_mode>danger-full-access</sandbox_mode>"),
"env context should reflect new sandbox mode: {env_message}"
);
assert!(
env_message.contains("<network_access>enabled</network_access>"),
"env context should enable network access for danger-full-access policy: {env_message}"
);
assert!(
env_message.contains(&second_cwd_str),
"env context should include updated cwd: {env_message}"
);
let exec_begin_notification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),

View File

@@ -150,10 +150,6 @@ pub struct RemoveArgs {
pub struct LoginArgs {
/// Name of the MCP server to authenticate with oauth.
pub name: String,
/// Comma-separated list of OAuth scopes to request.
#[arg(long, value_delimiter = ',', value_name = "SCOPE,SCOPE")]
pub scopes: Vec<String>,
}
#[derive(Debug, clap::Parser)]
@@ -283,7 +279,6 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re
config.mcp_oauth_credentials_store_mode,
http_headers.clone(),
env_http_headers.clone(),
&Vec::new(),
)
.await?;
println!("Successfully logged in.");
@@ -332,7 +327,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
);
}
let LoginArgs { name, scopes } = login_args;
let LoginArgs { name } = login_args;
let Some(server) = config.mcp_servers.get(&name) else {
bail!("No MCP server named '{name}' found.");
@@ -354,7 +349,6 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
config.mcp_oauth_credentials_store_mode,
http_headers,
env_http_headers,
&scopes,
)
.await?;
println!("Successfully logged in to MCP server '{name}'.");

View File

@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
codex-app-server-protocol = { workspace = true }
codex-apply-patch = { workspace = true }
codex-file-search = { workspace = true }
codex-mcp-client = { workspace = true }
codex-otel = { workspace = true, features = ["otel"] }
codex-protocol = { workspace = true }
codex-rmcp-client = { workspace = true }

View File

@@ -104,10 +104,10 @@ pub(crate) async fn stream_chat_completions(
} = item
{
let mut text = String::new();
for entry in items {
match entry {
ReasoningItemContent::ReasoningText { text: segment }
| ReasoningItemContent::Text { text: segment } => text.push_str(segment),
for c in items {
match c {
ReasoningItemContent::ReasoningText { text: t }
| ReasoningItemContent::Text { text: t } => text.push_str(t),
}
}
if text.trim().is_empty() {

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Debug;
use std::path::PathBuf;
@@ -6,11 +7,12 @@ use std::sync::atomic::AtomicU64;
use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::event_mapping::map_response_item_to_event_messages;
use crate::function_tool::FunctionCallError;
use crate::mcp::auth::McpAuthStatusEntry;
use crate::parse_command::parse_command;
use crate::parse_turn_item;
use crate::response_processing::process_items;
use crate::review_format::format_review_findings_block;
use crate::state::ItemCollector;
use crate::terminal;
use crate::user_notification::UserNotifier;
use async_channel::Receiver;
@@ -18,9 +20,9 @@ use async_channel::Sender;
use codex_apply_patch::ApplyPatchAction;
use codex_protocol::ConversationId;
use codex_protocol::items::TurnItem;
use codex_protocol::items::UserMessageItem;
use codex_protocol::protocol::ConversationPathResponseEvent;
use codex_protocol::protocol::ItemCompletedEvent;
use codex_protocol::protocol::ItemStartedEvent;
use codex_protocol::protocol::ExitedReviewModeEvent;
use codex_protocol::protocol::ReviewRequest;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionSource;
@@ -45,6 +47,7 @@ use tokio_util::sync::CancellationToken;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::trace;
use tracing::warn;
use crate::ModelProviderInfo;
@@ -83,6 +86,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
@@ -263,8 +267,10 @@ pub(crate) struct TurnContext {
pub(crate) sandbox_policy: SandboxPolicy,
pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
pub(crate) tools_config: ToolsConfig,
pub(crate) is_review_mode: bool,
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
pub(crate) item_collector: ItemCollector,
}
impl TurnContext {
@@ -353,6 +359,7 @@ impl Session {
provider: ModelProviderInfo,
session_configuration: &SessionConfiguration,
conversation_id: ConversationId,
tx_event: Sender<Event>,
sub_id: String,
) -> TurnContext {
let config = session_configuration.original_config_do_not_use.clone();
@@ -387,6 +394,8 @@ impl Session {
features: &config.features,
});
let item_collector = ItemCollector::new(tx_event, conversation_id, sub_id.clone());
TurnContext {
sub_id,
client,
@@ -397,8 +406,10 @@ impl Session {
sandbox_policy: session_configuration.sandbox_policy.clone(),
shell_environment_policy: config.shell_environment_policy.clone(),
tools_config,
is_review_mode: false,
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
item_collector,
}
}
@@ -452,6 +463,9 @@ impl Session {
let mcp_fut = McpConnectionManager::new(
config.mcp_servers.clone(),
config
.features
.enabled(crate::features::Feature::RmcpClient),
config.mcp_oauth_credentials_store_mode,
);
let default_shell_fut = shell::default_user_shell();
@@ -628,14 +642,6 @@ impl Session {
state.session_configuration = state.session_configuration.apply(&updates);
}
pub(crate) async fn base_config(&self) -> Arc<Config> {
let state = self.state.lock().await;
state
.session_configuration
.original_config_do_not_use
.clone()
}
pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
let sub_id = self.next_internal_sub_id();
self.new_turn_with_sub_id(sub_id, updates).await
@@ -659,6 +665,7 @@ impl Session {
session_configuration.provider.clone(),
&session_configuration,
self.conversation_id,
self.get_tx_event(),
sub_id,
);
if let Some(final_schema) = updates.final_output_json_schema {
@@ -703,59 +710,6 @@ impl Session {
}
}
async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
self.send_event(
turn_context,
EventMsg::ItemStarted(ItemStartedEvent {
thread_id: self.conversation_id,
turn_id: turn_context.sub_id.clone(),
item: item.clone(),
}),
)
.await;
}
async fn emit_turn_item_completed(
&self,
turn_context: &TurnContext,
item: TurnItem,
emit_raw_agent_reasoning: bool,
) {
self.send_event(
turn_context,
EventMsg::ItemCompleted(ItemCompletedEvent {
thread_id: self.conversation_id,
turn_id: turn_context.sub_id.clone(),
item: item.clone(),
}),
)
.await;
self.emit_turn_item_legacy_events(turn_context, &item, emit_raw_agent_reasoning)
.await;
}
async fn emit_turn_item_started_completed(
&self,
turn_context: &TurnContext,
item: TurnItem,
emit_raw_agent_reasoning: bool,
) {
self.emit_turn_item_started(turn_context, &item).await;
self.emit_turn_item_completed(turn_context, item, emit_raw_agent_reasoning)
.await;
}
async fn emit_turn_item_legacy_events(
&self,
turn_context: &TurnContext,
item: &TurnItem,
emit_raw_agent_reasoning: bool,
) {
for event in item.as_legacy_events(emit_raw_agent_reasoning) {
self.send_event(turn_context, event).await;
}
}
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
@@ -858,7 +812,7 @@ impl Session {
/// Records input items: always append to conversation history and
/// persist these response items to rollout.
pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
async fn record_conversation_items(&self, items: &[ResponseItem]) {
self.record_into_history(items).await;
self.persist_rollout_response_items(items).await;
}
@@ -875,7 +829,7 @@ impl Session {
history.record_items(std::iter::once(response_item));
}
RolloutItem::Compacted(compacted) => {
let snapshot = history.get_history();
let snapshot = history.contents();
let user_messages = collect_user_messages(&snapshot);
let rebuilt = build_compacted_history(
self.build_initial_context(turn_context),
@@ -887,7 +841,7 @@ impl Session {
_ => {}
}
}
history.get_history()
history.contents()
}
/// Append ResponseItems to the in-memory conversation history only.
@@ -936,15 +890,9 @@ impl Session {
}
}
// todo (aibrahim): get rid of this method. we shouldn't deal with vec[resposne_item] and rather use ConversationHistory.
pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
let mut state = self.state.lock().await;
state.history_snapshot()
}
pub(crate) async fn clone_history(&self) -> ConversationHistory {
let state = self.state.lock().await;
state.clone_history()
state.history_snapshot()
}
async fn update_token_usage_info(
@@ -998,22 +946,24 @@ impl Session {
/// Record a user input item to conversation history and also persist a
/// corresponding UserMessage EventMsg to rollout.
async fn record_input_and_rollout_usermsg(
&self,
turn_context: &TurnContext,
response_input: &ResponseInputItem,
) {
async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
let response_item: ResponseItem = response_input.clone().into();
// Add to conversation history and persist response item to rollout
self.record_conversation_items(std::slice::from_ref(&response_item))
.await;
// Derive user message events and persist only UserMessage to rollout
let turn_item = parse_turn_item(&response_item);
if let Some(item @ TurnItem::UserMessage(_)) = turn_item {
self.emit_turn_item_started_completed(turn_context, item, false)
.await;
let msgs =
map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning());
let user_msgs: Vec<RolloutItem> = msgs
.into_iter()
.filter_map(|m| match m {
EventMsg::UserMessage(ev) => Some(RolloutItem::EventMsg(EventMsg::UserMessage(ev))),
_ => None,
})
.collect();
if !user_msgs.is_empty() {
self.persist_rollout_items(&user_msgs).await;
}
}
@@ -1038,6 +988,16 @@ impl Session {
self.send_event(turn_context, event).await;
}
/// Build the full turn input by concatenating the current conversation
/// history with additional items for this turn.
pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
let history = {
let state = self.state.lock().await;
state.history_snapshot()
};
[history, extra].concat()
}
/// Returns the input if there was no task running to inject into
pub async fn inject_input(&self, input: Vec<UserInput>) -> Result<(), Vec<UserInput>> {
let mut active = self.active_turn.lock().await;
@@ -1198,8 +1158,19 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
{
sess.record_conversation_items(std::slice::from_ref(&env_item))
.await;
for msg in map_response_item_to_event_messages(
&env_item,
sess.show_raw_agent_reasoning(),
) {
sess.send_event(&current_context, msg).await;
}
}
current_context
.item_collector
.started_completed(TurnItem::UserMessage(UserMessageItem::new(&items)))
.await;
sess.spawn_task(Arc::clone(&current_context), items, RegularTask)
.await;
previous_context = Some(current_context);
@@ -1470,8 +1441,14 @@ async fn spawn_review_thread(
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
cwd: parent_turn_context.cwd.clone(),
is_review_mode: true,
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
item_collector: ItemCollector::new(
sess.get_tx_event(),
sess.conversation_id,
sub_id.to_string(),
),
};
// Seed the child task with the review prompt as the initial user message.
@@ -1519,8 +1496,19 @@ pub(crate) async fn run_task(
sess.send_event(&turn_context, event).await;
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
.await;
// For review threads, keep an isolated in-memory history so the
// model sees a fresh conversation without the parent session's history.
// For normal turns, continue recording to the session history as before.
let is_review_mode = turn_context.is_review_mode;
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
if is_review_mode {
// Seed review threads with environment context so the model knows the working directory.
review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
review_thread_history.push(initial_input_for_turn.into());
} else {
sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
.await;
}
let mut last_agent_message: Option<String> = None;
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
@@ -1549,9 +1537,14 @@ pub(crate) async fn run_task(
// conversation history on each turn. The rollout file, however, should
// only record the new items that originated in this turn so that it
// represents an append-only log without duplicates.
let turn_input: Vec<ResponseItem> = {
let turn_input: Vec<ResponseItem> = if is_review_mode {
if !pending_input.is_empty() {
review_thread_history.extend(pending_input);
}
review_thread_history.clone()
} else {
sess.record_conversation_items(&pending_input).await;
sess.history_snapshot().await
sess.turn_input_with_history(pending_input).await
};
let turn_input_messages: Vec<String> = turn_input
@@ -1592,8 +1585,109 @@ pub(crate) async fn run_task(
let token_limit_reached = total_usage_tokens
.map(|tokens| tokens >= limit)
.unwrap_or(false);
let (responses, items_to_record_in_conversation_history) =
process_items(processed_items, &sess).await;
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
for processed_response_item in processed_items {
let ProcessedResponseItem { item, response } = processed_response_item;
match (&item, &response) {
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
// If the model returned a message, we need to record it.
items_to_record_in_conversation_history.push(item);
}
(
ResponseItem::LocalShellCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
},
);
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
},
);
}
(
ResponseItem::CustomToolCall { .. },
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: output.clone(),
},
);
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
) => {
items_to_record_in_conversation_history.push(item);
let output = match result {
Ok(call_tool_result) => {
convert_call_tool_result_to_function_call_output_payload(
call_tool_result,
)
}
Err(err) => FunctionCallOutputPayload {
content: err.clone(),
success: Some(false),
},
};
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output,
},
);
}
(
ResponseItem::Reasoning {
id,
summary,
content,
encrypted_content,
},
None,
) => {
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
id: id.clone(),
summary: summary.clone(),
content: content.clone(),
encrypted_content: encrypted_content.clone(),
});
}
_ => {
warn!("Unexpected response item: {item:?} with response: {response:?}");
}
};
if let Some(response) = response {
responses.push(response);
}
}
// Only attempt to take the lock if there is something to record.
if !items_to_record_in_conversation_history.is_empty() {
if is_review_mode {
review_thread_history
.extend(items_to_record_in_conversation_history.clone());
} else {
sess.record_conversation_items(&items_to_record_in_conversation_history)
.await;
}
}
if token_limit_reached {
if auto_compact_recently_attempted {
@@ -1632,10 +1726,7 @@ pub(crate) async fn run_task(
}
continue;
}
Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
}) => {
let _ = process_items(processed_items, &sess).await;
Err(CodexErr::TurnAborted) => {
// Aborted turn is reported via a different event.
break;
}
@@ -1651,9 +1742,50 @@ pub(crate) async fn run_task(
}
}
// If this was a review thread and we have a final assistant message,
// try to parse it as a ReviewOutput.
//
// If parsing fails, construct a minimal ReviewOutputEvent using the plain
// text as the overall explanation. Else, just exit review mode with None.
//
// Emits an ExitedReviewMode event with the parsed review output.
if turn_context.is_review_mode {
exit_review_mode(
sess.clone(),
Arc::clone(&turn_context),
last_agent_message.as_deref().map(parse_review_output_event),
)
.await;
}
last_agent_message
}
/// Parse the review output; when not valid JSON, build a structured
/// fallback that carries the plain text as the overall explanation.
///
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
// Try direct parse first
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
return ev;
}
// If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
&& start < end
&& let Some(slice) = text.get(start..=end)
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
{
return ev;
}
// Not JSON return a structured ReviewOutputEvent that carries
// the plain text as the overall explanation.
ReviewOutputEvent {
overall_explanation: text.to_string(),
..Default::default()
}
}
async fn run_turn(
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
@@ -1695,13 +1827,7 @@ async fn run_turn(
.await
{
Ok(output) => return Ok(output),
Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
}) => {
return Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
});
}
Err(CodexErr::TurnAborted) => return Err(CodexErr::TurnAborted),
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
Err(e @ CodexErr::Fatal(_)) => return Err(e),
@@ -1754,9 +1880,9 @@ async fn run_turn(
/// "handled" such that it produces a `ResponseInputItem` that needs to be
/// sent back to the model on the next turn.
#[derive(Debug)]
pub struct ProcessedResponseItem {
pub item: ResponseItem,
pub response: Option<ResponseInputItem>,
pub(crate) struct ProcessedResponseItem {
pub(crate) item: ResponseItem,
pub(crate) response: Option<ResponseInputItem>,
}
#[derive(Debug)]
@@ -1775,6 +1901,61 @@ async fn try_run_turn(
task_kind: TaskKind,
cancellation_token: CancellationToken,
) -> CodexResult<TurnRunResult> {
// call_ids that are part of this response.
let completed_call_ids = prompt
.input
.iter()
.filter_map(|ri| match ri {
ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
ResponseItem::LocalShellCall {
call_id: Some(call_id),
..
} => Some(call_id),
ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id),
_ => None,
})
.collect::<Vec<_>>();
// call_ids that were pending but are not part of this response.
// This usually happens because the user interrupted the model before we responded to one of its tool calls
// and then the user sent a follow-up message.
let missing_calls = {
prompt
.input
.iter()
.filter_map(|ri| match ri {
ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
ResponseItem::LocalShellCall {
call_id: Some(call_id),
..
} => Some(call_id),
ResponseItem::CustomToolCall { call_id, .. } => Some(call_id),
_ => None,
})
.filter_map(|call_id| {
if completed_call_ids.contains(&call_id) {
None
} else {
Some(call_id.clone())
}
})
.map(|call_id| ResponseItem::CustomToolCallOutput {
call_id,
output: "aborted".to_string(),
})
.collect::<Vec<_>>()
};
let prompt: Cow<Prompt> = if missing_calls.is_empty() {
Cow::Borrowed(prompt)
} else {
// Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
let input = [missing_calls, prompt.input.clone()].concat();
Cow::Owned(Prompt {
input,
..prompt.clone()
})
};
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
cwd: turn_context.cwd.clone(),
approval_policy: turn_context.approval_policy,
@@ -1783,12 +1964,11 @@ async fn try_run_turn(
effort: turn_context.client.get_reasoning_effort(),
summary: turn_context.client.get_reasoning_summary(),
});
sess.persist_rollout_items(&[rollout_item]).await;
let mut stream = turn_context
.client
.clone()
.stream_with_task_kind(prompt, task_kind)
.stream_with_task_kind(prompt.as_ref(), task_kind)
.or_cancel(&cancellation_token)
.await??;
@@ -1805,15 +1985,7 @@ async fn try_run_turn(
// Poll the next item from the model stream. We must inspect *both* Ok and Err
// cases so that transient stream failures (e.g., dropped SSE connection before
// `response.completed`) bubble up and trigger the caller's retry logic.
let event = match stream.next().or_cancel(&cancellation_token).await {
Ok(event) => event,
Err(codex_async_utils::CancelErr::Cancelled) => {
let processed_items = output.try_collect().await?;
return Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
});
}
};
let event = stream.next().or_cancel(&cancellation_token).await?;
let event = match event {
Some(res) => res?,
@@ -1837,8 +2009,7 @@ async fn try_run_turn(
let payload_preview = call.payload.log_payload().into_owned();
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
let response =
tool_runtime.handle_tool_call(call, cancellation_token.child_token());
let response = tool_runtime.handle_tool_call(call);
output.push_back(
async move {
@@ -1852,10 +2023,9 @@ async fn try_run_turn(
}
Ok(None) => {
let response = handle_non_tool_response_item(
sess.as_ref(),
Arc::clone(&sess),
Arc::clone(&turn_context),
item.clone(),
sess.show_raw_agent_reasoning(),
)
.await?;
add_completed(ProcessedResponseItem { item, response });
@@ -1920,7 +2090,12 @@ async fn try_run_turn(
} => {
sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
.await;
let processed_items = output.try_collect().await?;
let processed_items = output
.try_collect()
.or_cancel(&cancellation_token)
.await??;
let unified_diff = {
let mut tracker = turn_diff_tracker.lock().await;
tracker.get_unified_diff()
@@ -1940,8 +2115,12 @@ async fn try_run_turn(
ResponseEvent::OutputTextDelta(delta) => {
// In review child threads, suppress assistant text deltas; the
// UI will show a selection popup from the final ReviewOutput.
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
sess.send_event(&turn_context, event).await;
if !turn_context.is_review_mode {
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
sess.send_event(&turn_context, event).await;
} else {
trace!("suppressing OutputTextDelta in review mode");
}
}
ResponseEvent::ReasoningSummaryDelta(delta) => {
let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
@@ -1965,10 +2144,9 @@ async fn try_run_turn(
}
async fn handle_non_tool_response_item(
sess: &Session,
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
item: ResponseItem,
show_raw_agent_reasoning: bool,
) -> CodexResult<Option<ResponseInputItem>> {
debug!(?item, "Output item");
@@ -1976,14 +2154,15 @@ async fn handle_non_tool_response_item(
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. } => {
let turn_item = parse_turn_item(&item);
if let Some(turn_item) = turn_item {
sess.emit_turn_item_started_completed(
turn_context.as_ref(),
turn_item,
show_raw_agent_reasoning,
)
.await;
let msgs = match &item {
ResponseItem::Message { .. } if turn_context.is_review_mode => {
trace!("suppressing assistant Message in review mode");
Vec::new()
}
_ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning()),
};
for msg in msgs {
sess.send_event(&turn_context, msg).await;
}
}
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
@@ -2014,7 +2193,7 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
}
})
}
pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
fn convert_call_tool_result_to_function_call_output_payload(
call_tool_result: &CallToolResult,
) -> FunctionCallOutputPayload {
let CallToolResult {
@@ -2049,6 +2228,58 @@ pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
}
}
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
/// and records a developer message with the review output.
pub(crate) async fn exit_review_mode(
session: Arc<Session>,
turn_context: Arc<TurnContext>,
review_output: Option<ReviewOutputEvent>,
) {
let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
review_output: review_output.clone(),
});
session.send_event(turn_context.as_ref(), event).await;
let mut user_message = String::new();
if let Some(out) = review_output {
let mut findings_str = String::new();
let text = out.overall_explanation.trim();
if !text.is_empty() {
findings_str.push_str(text);
}
if !out.findings.is_empty() {
let block = format_review_findings_block(&out.findings, None);
findings_str.push_str(&format!("\n{block}"));
}
user_message.push_str(&format!(
r#"<user_action>
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
<action>review</action>
<results>
{findings_str}
</results>
</user_action>
"#));
} else {
user_message.push_str(r#"<user_action>
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
<action>review</action>
<results>
None.
</results>
</user_action>
"#);
}
session
.record_conversation_items(&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_message }],
}])
.await;
}
fn mcp_init_error_display(
server_name: &str,
entry: Option<&McpAuthStatusEntry>,
@@ -2418,6 +2649,7 @@ mod tests {
session_configuration.provider.clone(),
&session_configuration,
conversation_id,
tx_event.clone(),
"turn_id".to_string(),
);
@@ -2486,6 +2718,7 @@ mod tests {
session_configuration.provider.clone(),
&session_configuration,
conversation_id,
tx_event.clone(),
"turn_id".to_string(),
));
@@ -2528,6 +2761,12 @@ mod tests {
sleep(Duration::from_secs(60)).await;
}
}
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
if let TaskKind::Review = self.kind {
exit_review_mode(session.clone_session(), ctx, None).await;
}
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -2712,7 +2951,7 @@ mod tests {
rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
let summary1 = "summary one";
let snapshot1 = live_history.get_history();
let snapshot1 = live_history.contents();
let user_messages1 = collect_user_messages(&snapshot1);
let rebuilt1 = build_compacted_history(
session.build_initial_context(turn_context),
@@ -2745,7 +2984,7 @@ mod tests {
rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
let summary2 = "summary two";
let snapshot2 = live_history.get_history();
let snapshot2 = live_history.contents();
let user_messages2 = collect_user_messages(&snapshot2);
let rebuilt2 = build_compacted_history(
session.build_initial_context(turn_context),
@@ -2777,7 +3016,7 @@ mod tests {
live_history.record_items(std::iter::once(&assistant3));
rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
(rollout_items, live_history.get_history())
(rollout_items, live_history.contents())
}
#[tokio::test]

View File

@@ -11,20 +11,19 @@ use crate::protocol::AgentMessageEvent;
use crate::protocol::CompactedItem;
use crate::protocol::ErrorEvent;
use crate::protocol::EventMsg;
use crate::protocol::InputMessageKind;
use crate::protocol::TaskStartedEvent;
use crate::protocol::TurnContextItem;
use crate::state::TaskKind;
use crate::truncate::truncate_middle;
use crate::util::backoff;
use askama::Template;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::user_input::UserInput;
use futures::prelude::*;
use tracing::error;
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
@@ -65,10 +64,9 @@ async fn run_compact_task_inner(
input: Vec<UserInput>,
) {
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
let mut history = sess.clone_history().await;
history.record_items(&[initial_input_for_turn.into()]);
let mut turn_input = sess
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
.await;
let mut truncated_count = 0usize;
let max_retries = turn_context.client.get_provider().stream_max_retries();
@@ -85,7 +83,6 @@ async fn run_compact_task_inner(
sess.persist_rollout_items(&[rollout_item]).await;
loop {
let turn_input = history.get_history();
let prompt = Prompt {
input: turn_input.clone(),
..Default::default()
@@ -110,11 +107,7 @@ async fn run_compact_task_inner(
}
Err(e @ CodexErr::ContextWindowExceeded) => {
if turn_input.len() > 1 {
// Trim from the beginning to preserve cache (prefix-based) and keep recent messages intact.
error!(
"Context window exceeded while compacting; removing oldest history item. Error: {e}"
);
history.remove_first_item();
turn_input.remove(0);
truncated_count += 1;
retries = 0;
continue;
@@ -188,13 +181,23 @@ pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
items
.iter()
.filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
Some(TurnItem::UserMessage(user)) => Some(user.message()),
.filter_map(|item| match item {
ResponseItem::Message { role, content, .. } if role == "user" => {
content_items_to_text(content)
}
_ => None,
})
.filter(|text| !is_session_prefix_message(text))
.collect()
}
pub fn is_session_prefix_message(text: &str) -> bool {
matches!(
InputMessageKind::from(("user", text)),
InputMessageKind::UserInstructions | InputMessageKind::EnvironmentContext
)
}
pub(crate) fn build_compacted_history(
initial_context: Vec<ResponseItem>,
user_messages: &[String],
@@ -316,16 +319,21 @@ mod tests {
ResponseItem::Message {
id: Some("user".to_string()),
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "first".to_string(),
}],
content: vec![
ContentItem::InputText {
text: "first".to_string(),
},
ContentItem::OutputText {
text: "second".to_string(),
},
],
},
ResponseItem::Other,
];
let collected = collect_user_messages(&items);
assert_eq!(vec!["first".to_string()], collected);
assert_eq!(vec!["first\nsecond".to_string()], collected);
}
#[test]

View File

@@ -1220,7 +1220,7 @@ impl Config {
}
}
}
let approval_policy = approval_policy_override
let mut approval_policy = approval_policy_override
.or(config_profile.approval_policy)
.or(cfg.approval_policy)
.unwrap_or_else(|| {
@@ -1328,6 +1328,10 @@ impl Config {
.or(cfg.review_model)
.unwrap_or_else(default_review_model);
if features.enabled(Feature::ApproveAll) {
approval_policy = AskForApproval::OnRequest;
}
let config = Self {
model,
review_model,
@@ -1707,6 +1711,26 @@ trust_level = "trusted"
Ok(())
}
#[test]
fn approve_all_feature_forces_on_request_policy() -> std::io::Result<()> {
let cfg = r#"
[features]
approve_all = true
"#;
let parsed = toml::from_str::<ConfigToml>(cfg)
.expect("TOML deserialization should succeed for approve_all feature");
let temp_dir = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
parsed,
ConfigOverrides::default(),
temp_dir.path().to_path_buf(),
)?;
assert!(config.features.enabled(Feature::ApproveAll));
assert_eq!(config.approval_policy, AskForApproval::OnRequest);
Ok(())
}
#[test]
fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
let codex_home = TempDir::new()?;

View File

@@ -1,6 +1,4 @@
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseItem;
use tracing::error;
/// Transcript of conversation history
#[derive(Debug, Clone, Default)]
@@ -14,6 +12,11 @@ impl ConversationHistory {
Self { items: Vec::new() }
}
/// Returns a clone of the contents in the transcript.
pub(crate) fn contents(&self) -> Vec<ResponseItem> {
self.items.clone()
}
/// `items` is ordered from oldest to newest.
pub(crate) fn record_items<I>(&mut self, items: I)
where
@@ -29,287 +32,9 @@ impl ConversationHistory {
}
}
pub(crate) fn get_history(&mut self) -> Vec<ResponseItem> {
self.normalize_history();
self.contents()
}
pub(crate) fn remove_first_item(&mut self) {
if !self.items.is_empty() {
// Remove the oldest item (front of the list). Items are ordered from
// oldest → newest, so index 0 is the first entry recorded.
let removed = self.items.remove(0);
// If the removed item participates in a call/output pair, also remove
// its corresponding counterpart to keep the invariants intact without
// running a full normalization pass.
self.remove_corresponding_for(&removed);
}
}
/// This function enforces a couple of invariants on the in-memory history:
/// 1. every call (function/custom) has a corresponding output entry
/// 2. every output has a corresponding call entry
fn normalize_history(&mut self) {
// all function/tool calls must have a corresponding output
self.ensure_call_outputs_present();
// all outputs must have a corresponding function/tool call
self.remove_orphan_outputs();
}
/// Returns a clone of the contents in the transcript.
fn contents(&self) -> Vec<ResponseItem> {
self.items.clone()
}
fn ensure_call_outputs_present(&mut self) {
// Collect synthetic outputs to insert immediately after their calls.
// Store the insertion position (index of call) alongside the item so
// we can insert in reverse order and avoid index shifting.
let mut missing_outputs_to_insert: Vec<(usize, ResponseItem)> = Vec::new();
for (idx, item) in self.items.iter().enumerate() {
match item {
ResponseItem::FunctionCall { call_id, .. } => {
let has_output = self.items.iter().any(|i| match i {
ResponseItem::FunctionCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
if !has_output {
error_or_panic(format!(
"Function call output is missing for call id: {call_id}"
));
missing_outputs_to_insert.push((
idx,
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
));
}
}
ResponseItem::CustomToolCall { call_id, .. } => {
let has_output = self.items.iter().any(|i| match i {
ResponseItem::CustomToolCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
if !has_output {
error_or_panic(format!(
"Custom tool call output is missing for call id: {call_id}"
));
missing_outputs_to_insert.push((
idx,
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: "aborted".to_string(),
},
));
}
}
// LocalShellCall is represented in upstream streams by a FunctionCallOutput
ResponseItem::LocalShellCall { call_id, .. } => {
if let Some(call_id) = call_id.as_ref() {
let has_output = self.items.iter().any(|i| match i {
ResponseItem::FunctionCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
if !has_output {
error_or_panic(format!(
"Local shell call output is missing for call id: {call_id}"
));
missing_outputs_to_insert.push((
idx,
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
));
}
}
}
ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::Other
| ResponseItem::Message { .. } => {
// nothing to do for these variants
}
}
}
if !missing_outputs_to_insert.is_empty() {
// Insert from the end to avoid shifting subsequent indices.
missing_outputs_to_insert.sort_by_key(|(i, _)| *i);
for (idx, item) in missing_outputs_to_insert.into_iter().rev() {
let insert_pos = idx + 1; // place immediately after the call
if insert_pos <= self.items.len() {
self.items.insert(insert_pos, item);
} else {
self.items.push(item);
}
}
}
}
fn remove_orphan_outputs(&mut self) {
// Work on a snapshot to avoid borrowing `self.items` while mutating it.
let snapshot = self.items.clone();
let mut orphan_output_call_ids: std::collections::HashSet<String> =
std::collections::HashSet::new();
for item in &snapshot {
match item {
ResponseItem::FunctionCallOutput { call_id, .. } => {
let has_call = snapshot.iter().any(|i| match i {
ResponseItem::FunctionCall {
call_id: existing, ..
} => existing == call_id,
ResponseItem::LocalShellCall {
call_id: Some(existing),
..
} => existing == call_id,
_ => false,
});
if !has_call {
error_or_panic(format!("Function call is missing for call id: {call_id}"));
orphan_output_call_ids.insert(call_id.clone());
}
}
ResponseItem::CustomToolCallOutput { call_id, .. } => {
let has_call = snapshot.iter().any(|i| match i {
ResponseItem::CustomToolCall {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
if !has_call {
error_or_panic(format!(
"Custom tool call is missing for call id: {call_id}"
));
orphan_output_call_ids.insert(call_id.clone());
}
}
ResponseItem::FunctionCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::Other
| ResponseItem::Message { .. } => {
// nothing to do for these variants
}
}
}
if !orphan_output_call_ids.is_empty() {
let ids = orphan_output_call_ids;
self.items.retain(|i| match i {
ResponseItem::FunctionCallOutput { call_id, .. }
| ResponseItem::CustomToolCallOutput { call_id, .. } => !ids.contains(call_id),
_ => true,
});
}
}
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
self.items = items;
}
/// Removes the corresponding paired item for the provided `item`, if any.
///
/// Pairs:
/// - FunctionCall <-> FunctionCallOutput
/// - CustomToolCall <-> CustomToolCallOutput
/// - LocalShellCall(call_id: Some) <-> FunctionCallOutput
fn remove_corresponding_for(&mut self, item: &ResponseItem) {
match item {
ResponseItem::FunctionCall { call_id, .. } => {
self.remove_first_matching(|i| match i {
ResponseItem::FunctionCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
}
ResponseItem::CustomToolCall { call_id, .. } => {
self.remove_first_matching(|i| match i {
ResponseItem::CustomToolCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
}
ResponseItem::LocalShellCall {
call_id: Some(call_id),
..
} => {
self.remove_first_matching(|i| match i {
ResponseItem::FunctionCallOutput {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
}
ResponseItem::FunctionCallOutput { call_id, .. } => {
self.remove_first_matching(|i| match i {
ResponseItem::FunctionCall {
call_id: existing, ..
} => existing == call_id,
ResponseItem::LocalShellCall {
call_id: Some(existing),
..
} => existing == call_id,
_ => false,
});
}
ResponseItem::CustomToolCallOutput { call_id, .. } => {
self.remove_first_matching(|i| match i {
ResponseItem::CustomToolCall {
call_id: existing, ..
} => existing == call_id,
_ => false,
});
}
_ => {}
}
}
/// Remove the first item matching the predicate.
fn remove_first_matching<F>(&mut self, predicate: F)
where
F: FnMut(&ResponseItem) -> bool,
{
if let Some(pos) = self.items.iter().position(predicate) {
self.items.remove(pos);
}
}
}
#[inline]
fn error_or_panic(message: String) {
if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
panic!("{message}");
} else {
error!("{message}");
}
}
/// Anything that is not a system message or "reasoning" message is considered
@@ -332,11 +57,6 @@ fn is_api_message(message: &ResponseItem) -> bool {
mod tests {
use super::*;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::LocalShellAction;
use codex_protocol::models::LocalShellExecAction;
use codex_protocol::models::LocalShellStatus;
use pretty_assertions::assert_eq;
fn assistant_msg(text: &str) -> ResponseItem {
ResponseItem::Message {
@@ -348,12 +68,6 @@ mod tests {
}
}
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
let mut h = ConversationHistory::new();
h.record_items(items.iter());
h
}
fn user_msg(text: &str) -> ResponseItem {
ResponseItem::Message {
id: None,
@@ -403,452 +117,4 @@ mod tests {
]
);
}
#[test]
fn remove_first_item_removes_matching_output_for_function_call() {
let items = vec![
ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
},
];
let mut h = create_history_with_items(items);
h.remove_first_item();
assert_eq!(h.contents(), vec![]);
}
#[test]
fn remove_first_item_removes_matching_call_for_output() {
let items = vec![
ResponseItem::FunctionCallOutput {
call_id: "call-2".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
},
ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-2".to_string(),
},
];
let mut h = create_history_with_items(items);
h.remove_first_item();
assert_eq!(h.contents(), vec![]);
}
#[test]
fn remove_first_item_handles_local_shell_pair() {
let items = vec![
ResponseItem::LocalShellCall {
id: None,
call_id: Some("call-3".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string(), "hi".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
},
ResponseItem::FunctionCallOutput {
call_id: "call-3".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
},
];
let mut h = create_history_with_items(items);
h.remove_first_item();
assert_eq!(h.contents(), vec![]);
}
#[test]
fn remove_first_item_handles_custom_tool_pair() {
let items = vec![
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "tool-1".to_string(),
name: "my_tool".to_string(),
input: "{}".to_string(),
},
ResponseItem::CustomToolCallOutput {
call_id: "tool-1".to_string(),
output: "ok".to_string(),
},
];
let mut h = create_history_with_items(items);
h.remove_first_item();
assert_eq!(h.contents(), vec![]);
}
//TODO(aibrahim): run CI in release mode.
#[cfg(not(debug_assertions))]
#[test]
fn normalize_adds_missing_output_for_function_call() {
let items = vec![ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-x".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(
h.contents(),
vec![
ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-x".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-x".to_string(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
]
);
}
#[cfg(not(debug_assertions))]
#[test]
fn normalize_adds_missing_output_for_custom_tool_call() {
let items = vec![ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "tool-x".to_string(),
name: "custom".to_string(),
input: "{}".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(
h.contents(),
vec![
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "tool-x".to_string(),
name: "custom".to_string(),
input: "{}".to_string(),
},
ResponseItem::CustomToolCallOutput {
call_id: "tool-x".to_string(),
output: "aborted".to_string(),
},
]
);
}
#[cfg(not(debug_assertions))]
#[test]
fn normalize_adds_missing_output_for_local_shell_call_with_id() {
let items = vec![ResponseItem::LocalShellCall {
id: None,
call_id: Some("shell-1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string(), "hi".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(
h.contents(),
vec![
ResponseItem::LocalShellCall {
id: None,
call_id: Some("shell-1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string(), "hi".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
},
ResponseItem::FunctionCallOutput {
call_id: "shell-1".to_string(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
]
);
}
#[cfg(not(debug_assertions))]
#[test]
fn normalize_removes_orphan_function_call_output() {
let items = vec![ResponseItem::FunctionCallOutput {
call_id: "orphan-1".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
}];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(h.contents(), vec![]);
}
#[cfg(not(debug_assertions))]
#[test]
fn normalize_removes_orphan_custom_tool_call_output() {
let items = vec![ResponseItem::CustomToolCallOutput {
call_id: "orphan-2".to_string(),
output: "ok".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(h.contents(), vec![]);
}
#[cfg(not(debug_assertions))]
#[test]
fn normalize_mixed_inserts_and_removals() {
let items = vec![
// Will get an inserted output
ResponseItem::FunctionCall {
id: None,
name: "f1".to_string(),
arguments: "{}".to_string(),
call_id: "c1".to_string(),
},
// Orphan output that should be removed
ResponseItem::FunctionCallOutput {
call_id: "c2".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
},
// Will get an inserted custom tool output
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "t1".to_string(),
name: "tool".to_string(),
input: "{}".to_string(),
},
// Local shell call also gets an inserted function call output
ResponseItem::LocalShellCall {
id: None,
call_id: Some("s1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
},
];
let mut h = create_history_with_items(items);
h.normalize_history();
assert_eq!(
h.contents(),
vec![
ResponseItem::FunctionCall {
id: None,
name: "f1".to_string(),
arguments: "{}".to_string(),
call_id: "c1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "c1".to_string(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "t1".to_string(),
name: "tool".to_string(),
input: "{}".to_string(),
},
ResponseItem::CustomToolCallOutput {
call_id: "t1".to_string(),
output: "aborted".to_string(),
},
ResponseItem::LocalShellCall {
id: None,
call_id: Some("s1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
},
ResponseItem::FunctionCallOutput {
call_id: "s1".to_string(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
]
);
}
// In debug builds we panic on normalization errors instead of silently fixing them.
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_adds_missing_output_for_function_call_panics_in_debug() {
let items = vec![ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-x".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
}
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_adds_missing_output_for_custom_tool_call_panics_in_debug() {
let items = vec![ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "tool-x".to_string(),
name: "custom".to_string(),
input: "{}".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
}
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug() {
let items = vec![ResponseItem::LocalShellCall {
id: None,
call_id: Some("shell-1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string(), "hi".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
}
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_removes_orphan_function_call_output_panics_in_debug() {
let items = vec![ResponseItem::FunctionCallOutput {
call_id: "orphan-1".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
}];
let mut h = create_history_with_items(items);
h.normalize_history();
}
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_removes_orphan_custom_tool_call_output_panics_in_debug() {
let items = vec![ResponseItem::CustomToolCallOutput {
call_id: "orphan-2".to_string(),
output: "ok".to_string(),
}];
let mut h = create_history_with_items(items);
h.normalize_history();
}
#[cfg(debug_assertions)]
#[test]
#[should_panic]
fn normalize_mixed_inserts_and_removals_panics_in_debug() {
let items = vec![
ResponseItem::FunctionCall {
id: None,
name: "f1".to_string(),
arguments: "{}".to_string(),
call_id: "c1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "c2".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
success: None,
},
},
ResponseItem::CustomToolCall {
id: None,
status: None,
call_id: "t1".to_string(),
name: "tool".to_string(),
input: "{}".to_string(),
},
ResponseItem::LocalShellCall {
id: None,
call_id: Some("s1".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["echo".to_string()],
timeout_ms: None,
working_directory: None,
env: None,
user: None,
}),
},
];
let mut h = create_history_with_items(items);
h.normalize_history();
}
}

View File

@@ -3,6 +3,8 @@ use crate::CodexAuth;
use crate::codex::Codex;
use crate::codex::CodexSpawnOk;
use crate::codex::INITIAL_SUBMIT_ID;
use crate::codex::compact::content_items_to_text;
use crate::codex::compact::is_session_prefix_message;
use crate::codex_conversation::CodexConversation;
use crate::config::Config;
use crate::error::CodexErr;
@@ -12,7 +14,6 @@ use crate::protocol::EventMsg;
use crate::protocol::SessionConfiguredEvent;
use crate::rollout::RolloutRecorder;
use codex_protocol::ConversationId;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::protocol::RolloutItem;
@@ -181,11 +182,9 @@ fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> Initia
// Find indices of user message inputs in rollout order.
let mut user_positions: Vec<usize> = Vec::new();
for (idx, item) in items.iter().enumerate() {
if let RolloutItem::ResponseItem(item @ ResponseItem::Message { .. }) = item
&& matches!(
crate::event_mapping::parse_turn_item(item),
Some(TurnItem::UserMessage(_))
)
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = item
&& role == "user"
&& content_items_to_text(content).is_some_and(|text| !is_session_prefix_message(&text))
{
user_positions.push(idx);
}

View File

@@ -1,4 +1,3 @@
use crate::codex::ProcessedResponseItem;
use crate::exec::ExecToolCallOutput;
use crate::token_data::KnownPlan;
use crate::token_data::PlanType;
@@ -54,11 +53,8 @@ pub enum SandboxErr {
#[derive(Error, Debug)]
pub enum CodexErr {
// todo(aibrahim): git rid of this error carrying the dangling artifacts
#[error("turn aborted")]
TurnAborted {
dangling_artifacts: Vec<ProcessedResponseItem>,
},
TurnAborted,
/// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
/// handshake has succeeded but **before** it finished emitting `response.completed`.
@@ -162,9 +158,7 @@ pub enum CodexErr {
impl From<CancelErr> for CodexErr {
fn from(_: CancelErr) -> Self {
CodexErr::TurnAborted {
dangling_artifacts: Vec::new(),
}
CodexErr::TurnAborted
}
}

View File

@@ -1,131 +1,139 @@
use codex_protocol::items::AgentMessageContent;
use codex_protocol::items::AgentMessageItem;
use codex_protocol::items::ReasoningItem;
use codex_protocol::items::TurnItem;
use codex_protocol::items::UserMessageItem;
use codex_protocol::items::WebSearchItem;
use crate::protocol::AgentMessageEvent;
use crate::protocol::AgentReasoningEvent;
use crate::protocol::AgentReasoningRawContentEvent;
use crate::protocol::EventMsg;
use crate::protocol::InputMessageKind;
use crate::protocol::UserMessageEvent;
use crate::protocol::WebSearchEndEvent;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::WebSearchAction;
use codex_protocol::user_input::UserInput;
use tracing::warn;
fn is_session_prefix(text: &str) -> bool {
let trimmed = text.trim_start();
let lowered = trimmed.to_ascii_lowercase();
lowered.starts_with("<environment_context>") || lowered.starts_with("<user_instructions>")
}
fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
let mut content: Vec<UserInput> = Vec::new();
for content_item in message.iter() {
match content_item {
ContentItem::InputText { text } => {
if is_session_prefix(text) {
return None;
}
content.push(UserInput::Text { text: text.clone() });
}
ContentItem::InputImage { image_url } => {
content.push(UserInput::Image {
image_url: image_url.clone(),
});
}
ContentItem::OutputText { text } => {
if is_session_prefix(text) {
return None;
}
warn!("Output text in user message: {}", text);
}
}
}
Some(UserMessageItem::new(&content))
}
fn parse_agent_message(message: &[ContentItem]) -> AgentMessageItem {
let mut content: Vec<AgentMessageContent> = Vec::new();
for content_item in message.iter() {
match content_item {
ContentItem::OutputText { text } => {
content.push(AgentMessageContent::Text { text: text.clone() });
}
_ => {
warn!(
"Unexpected content item in agent message: {:?}",
content_item
);
}
}
}
AgentMessageItem::new(&content)
}
pub fn parse_turn_item(item: &ResponseItem) -> Option<TurnItem> {
/// Convert a `ResponseItem` into zero or more `EventMsg` values that the UI can render.
///
/// When `show_raw_agent_reasoning` is false, raw reasoning content events are omitted.
pub(crate) fn map_response_item_to_event_messages(
item: &ResponseItem,
show_raw_agent_reasoning: bool,
) -> Vec<EventMsg> {
match item {
ResponseItem::Message { role, content, .. } => match role.as_str() {
"user" => parse_user_message(content).map(TurnItem::UserMessage),
"assistant" => Some(TurnItem::AgentMessage(parse_agent_message(content))),
"system" => None,
_ => None,
},
ResponseItem::Reasoning {
id,
summary,
content,
..
} => {
let summary_text = summary
.iter()
.map(|entry| match entry {
ReasoningItemReasoningSummary::SummaryText { text } => text.clone(),
})
.collect();
let raw_content = content
.clone()
.unwrap_or_default()
.into_iter()
.map(|entry| match entry {
ReasoningItemContent::ReasoningText { text }
| ReasoningItemContent::Text { text } => text,
})
.collect();
Some(TurnItem::Reasoning(ReasoningItem {
id: id.clone(),
summary_text,
raw_content,
}))
ResponseItem::Message { role, content, .. } => {
// Do not surface system messages as user events.
if role == "system" {
return Vec::new();
}
let mut events: Vec<EventMsg> = Vec::new();
let mut message_parts: Vec<String> = Vec::new();
let mut images: Vec<String> = Vec::new();
let mut kind: Option<InputMessageKind> = None;
for content_item in content.iter() {
match content_item {
ContentItem::InputText { text } => {
if kind.is_none() {
let trimmed = text.trim_start();
kind = if trimmed.starts_with("<environment_context>") {
Some(InputMessageKind::EnvironmentContext)
} else if trimmed.starts_with("<user_instructions>") {
Some(InputMessageKind::UserInstructions)
} else {
Some(InputMessageKind::Plain)
};
}
message_parts.push(text.clone());
}
ContentItem::InputImage { image_url } => {
images.push(image_url.clone());
}
ContentItem::OutputText { text } => {
events.push(EventMsg::AgentMessage(AgentMessageEvent {
message: text.clone(),
}));
}
}
}
if !message_parts.is_empty() || !images.is_empty() {
let message = if message_parts.is_empty() {
String::new()
} else {
message_parts.join("")
};
let images = if images.is_empty() {
None
} else {
Some(images)
};
events.push(EventMsg::UserMessage(UserMessageEvent {
message,
kind,
images,
}));
}
events
}
ResponseItem::WebSearchCall {
id,
action: WebSearchAction::Search { query },
..
} => Some(TurnItem::WebSearch(WebSearchItem {
id: id.clone().unwrap_or_default(),
query: query.clone(),
})),
_ => None,
ResponseItem::Reasoning {
summary, content, ..
} => {
let mut events = Vec::new();
for ReasoningItemReasoningSummary::SummaryText { text } in summary {
events.push(EventMsg::AgentReasoning(AgentReasoningEvent {
text: text.clone(),
}));
}
if let Some(items) = content.as_ref().filter(|_| show_raw_agent_reasoning) {
for c in items {
let text = match c {
ReasoningItemContent::ReasoningText { text }
| ReasoningItemContent::Text { text } => text,
};
events.push(EventMsg::AgentReasoningRawContent(
AgentReasoningRawContentEvent { text: text.clone() },
));
}
}
events
}
ResponseItem::WebSearchCall { id, action, .. } => match action {
WebSearchAction::Search { query } => {
let call_id = id.clone().unwrap_or_else(|| "".to_string());
vec![EventMsg::WebSearchEnd(WebSearchEndEvent {
call_id,
query: query.clone(),
})]
}
WebSearchAction::Other => Vec::new(),
},
// Variants that require side effects are handled by higher layers and do not emit events here.
ResponseItem::FunctionCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::Other => Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::parse_turn_item;
use codex_protocol::items::AgentMessageContent;
use codex_protocol::items::TurnItem;
use super::map_response_item_to_event_messages;
use crate::protocol::EventMsg;
use crate::protocol::InputMessageKind;
use assert_matches::assert_matches;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::WebSearchAction;
use codex_protocol::user_input::UserInput;
use pretty_assertions::assert_eq;
#[test]
fn parses_user_message_with_text_and_two_images() {
fn maps_user_message_with_text_and_two_images() {
let img1 = "https://example.com/one.png".to_string();
let img2 = "https://example.com/two.jpg".to_string();
@@ -145,128 +153,16 @@ mod tests {
],
};
let turn_item = parse_turn_item(&item).expect("expected user message turn item");
let events = map_response_item_to_event_messages(&item, false);
assert_eq!(events.len(), 1, "expected a single user message event");
match turn_item {
TurnItem::UserMessage(user) => {
let expected_content = vec![
UserInput::Text {
text: "Hello world".to_string(),
},
UserInput::Image { image_url: img1 },
UserInput::Image { image_url: img2 },
];
assert_eq!(user.content, expected_content);
match &events[0] {
EventMsg::UserMessage(user) => {
assert_eq!(user.message, "Hello world");
assert_matches!(user.kind, Some(InputMessageKind::Plain));
assert_eq!(user.images, Some(vec![img1, img2]));
}
other => panic!("expected TurnItem::UserMessage, got {other:?}"),
}
}
#[test]
fn parses_agent_message() {
let item = ResponseItem::Message {
id: Some("msg-1".to_string()),
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "Hello from Codex".to_string(),
}],
};
let turn_item = parse_turn_item(&item).expect("expected agent message turn item");
match turn_item {
TurnItem::AgentMessage(message) => {
let Some(AgentMessageContent::Text { text }) = message.content.first() else {
panic!("expected agent message text content");
};
assert_eq!(text, "Hello from Codex");
}
other => panic!("expected TurnItem::AgentMessage, got {other:?}"),
}
}
#[test]
fn parses_reasoning_summary_and_raw_content() {
let item = ResponseItem::Reasoning {
id: "reasoning_1".to_string(),
summary: vec![
ReasoningItemReasoningSummary::SummaryText {
text: "Step 1".to_string(),
},
ReasoningItemReasoningSummary::SummaryText {
text: "Step 2".to_string(),
},
],
content: Some(vec![ReasoningItemContent::ReasoningText {
text: "raw details".to_string(),
}]),
encrypted_content: None,
};
let turn_item = parse_turn_item(&item).expect("expected reasoning turn item");
match turn_item {
TurnItem::Reasoning(reasoning) => {
assert_eq!(
reasoning.summary_text,
vec!["Step 1".to_string(), "Step 2".to_string()]
);
assert_eq!(reasoning.raw_content, vec!["raw details".to_string()]);
}
other => panic!("expected TurnItem::Reasoning, got {other:?}"),
}
}
#[test]
fn parses_reasoning_including_raw_content() {
let item = ResponseItem::Reasoning {
id: "reasoning_2".to_string(),
summary: vec![ReasoningItemReasoningSummary::SummaryText {
text: "Summarized step".to_string(),
}],
content: Some(vec![
ReasoningItemContent::ReasoningText {
text: "raw step".to_string(),
},
ReasoningItemContent::Text {
text: "final thought".to_string(),
},
]),
encrypted_content: None,
};
let turn_item = parse_turn_item(&item).expect("expected reasoning turn item");
match turn_item {
TurnItem::Reasoning(reasoning) => {
assert_eq!(reasoning.summary_text, vec!["Summarized step".to_string()]);
assert_eq!(
reasoning.raw_content,
vec!["raw step".to_string(), "final thought".to_string()]
);
}
other => panic!("expected TurnItem::Reasoning, got {other:?}"),
}
}
#[test]
fn parses_web_search_call() {
let item = ResponseItem::WebSearchCall {
id: Some("ws_1".to_string()),
status: Some("completed".to_string()),
action: WebSearchAction::Search {
query: "weather".to_string(),
},
};
let turn_item = parse_turn_item(&item).expect("expected web search turn item");
match turn_item {
TurnItem::WebSearch(search) => {
assert_eq!(search.id, "ws_1");
assert_eq!(search.query, "weather");
}
other => panic!("expected TurnItem::WebSearch, got {other:?}"),
other => panic!("expected UserMessage, got {other:?}"),
}
}
}

View File

@@ -31,7 +31,7 @@ pub enum Feature {
UnifiedExec,
/// Use the streamable exec-command/write-stdin tool pair.
StreamableShell,
/// Enable experimental RMCP features such as OAuth login.
/// Use the official Rust MCP client (rmcp).
RmcpClient,
/// Include the freeform apply_patch tool.
ApplyPatchFreeform,
@@ -39,6 +39,8 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Automatically approve all approval requests from the harness.
ApproveAll,
}
impl Feature {
@@ -236,4 +238,10 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::ApproveAll,
key: "approve_all",
stage: Stage::Experimental,
default_enabled: false,
},
];

View File

@@ -14,7 +14,6 @@ mod client_common;
pub mod codex;
mod codex_conversation;
pub use codex_conversation::CodexConversation;
mod codex_delegate;
mod command_safety;
pub mod config;
pub mod config_edit;
@@ -37,7 +36,6 @@ mod mcp_tool_call;
mod message_history;
mod model_provider_info;
pub mod parse_command;
mod response_processing;
pub mod sandboxing;
pub mod token_data;
mod truncate;
@@ -100,10 +98,11 @@ pub use client_common::REVIEW_PROMPT;
pub use client_common::ResponseEvent;
pub use client_common::ResponseStream;
pub use codex::compact::content_items_to_text;
pub use codex::compact::is_session_prefix_message;
pub use codex_protocol::models::ContentItem;
pub use codex_protocol::models::LocalShellAction;
pub use codex_protocol::models::LocalShellExecAction;
pub use codex_protocol::models::LocalShellStatus;
pub use codex_protocol::models::ReasoningItemContent;
pub use codex_protocol::models::ResponseItem;
pub use event_mapping::parse_turn_item;
pub mod otel_init;

View File

@@ -1,6 +1,6 @@
//! Connection manager for Model Context Protocol (MCP) servers.
//!
//! The [`McpConnectionManager`] owns one [`codex_rmcp_client::RmcpClient`] per
//! The [`McpConnectionManager`] owns one [`codex_mcp_client::McpClient`] per
//! configured server (keyed by the *server name*). It offers convenience
//! helpers to query the available tools across *all* servers and returns them
//! in a single aggregated map using the fully-qualified tool name
@@ -10,12 +10,14 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::env;
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Context;
use anyhow::Result;
use anyhow::anyhow;
use codex_mcp_client::McpClient;
use codex_rmcp_client::OAuthCredentialsStoreMode;
use codex_rmcp_client::RmcpClient;
use mcp_types::ClientCapabilities;
@@ -97,12 +99,134 @@ struct ToolInfo {
}
struct ManagedClient {
client: Arc<RmcpClient>,
client: McpClientAdapter,
startup_timeout: Duration,
tool_timeout: Option<Duration>,
}
/// A thin wrapper around a set of running [`RmcpClient`] instances.
#[derive(Clone)]
enum McpClientAdapter {
Legacy(Arc<McpClient>),
Rmcp(Arc<RmcpClient>),
}
impl McpClientAdapter {
#[allow(clippy::too_many_arguments)]
async fn new_stdio_client(
use_rmcp_client: bool,
program: OsString,
args: Vec<OsString>,
env: Option<HashMap<String, String>>,
env_vars: Vec<String>,
cwd: Option<PathBuf>,
params: mcp_types::InitializeRequestParams,
startup_timeout: Duration,
) -> Result<Self> {
if use_rmcp_client {
let client =
Arc::new(RmcpClient::new_stdio_client(program, args, env, &env_vars, cwd).await?);
client.initialize(params, Some(startup_timeout)).await?;
Ok(McpClientAdapter::Rmcp(client))
} else {
let client =
Arc::new(McpClient::new_stdio_client(program, args, env, &env_vars, cwd).await?);
client.initialize(params, Some(startup_timeout)).await?;
Ok(McpClientAdapter::Legacy(client))
}
}
#[allow(clippy::too_many_arguments)]
async fn new_streamable_http_client(
server_name: String,
url: String,
bearer_token: Option<String>,
http_headers: Option<HashMap<String, String>>,
env_http_headers: Option<HashMap<String, String>>,
params: mcp_types::InitializeRequestParams,
startup_timeout: Duration,
store_mode: OAuthCredentialsStoreMode,
) -> Result<Self> {
let client = Arc::new(
RmcpClient::new_streamable_http_client(
&server_name,
&url,
bearer_token,
http_headers,
env_http_headers,
store_mode,
)
.await?,
);
client.initialize(params, Some(startup_timeout)).await?;
Ok(McpClientAdapter::Rmcp(client))
}
async fn list_tools(
&self,
params: Option<mcp_types::ListToolsRequestParams>,
timeout: Option<Duration>,
) -> Result<mcp_types::ListToolsResult> {
match self {
McpClientAdapter::Legacy(client) => client.list_tools(params, timeout).await,
McpClientAdapter::Rmcp(client) => client.list_tools(params, timeout).await,
}
}
async fn list_resources(
&self,
params: Option<mcp_types::ListResourcesRequestParams>,
timeout: Option<Duration>,
) -> Result<mcp_types::ListResourcesResult> {
match self {
McpClientAdapter::Legacy(_) => Ok(ListResourcesResult {
next_cursor: None,
resources: Vec::new(),
}),
McpClientAdapter::Rmcp(client) => client.list_resources(params, timeout).await,
}
}
async fn read_resource(
&self,
params: mcp_types::ReadResourceRequestParams,
timeout: Option<Duration>,
) -> Result<mcp_types::ReadResourceResult> {
match self {
McpClientAdapter::Legacy(_) => Err(anyhow!(
"resources/read is not supported by legacy MCP clients"
)),
McpClientAdapter::Rmcp(client) => client.read_resource(params, timeout).await,
}
}
async fn list_resource_templates(
&self,
params: Option<mcp_types::ListResourceTemplatesRequestParams>,
timeout: Option<Duration>,
) -> Result<mcp_types::ListResourceTemplatesResult> {
match self {
McpClientAdapter::Legacy(_) => Ok(ListResourceTemplatesResult {
next_cursor: None,
resource_templates: Vec::new(),
}),
McpClientAdapter::Rmcp(client) => client.list_resource_templates(params, timeout).await,
}
}
async fn call_tool(
&self,
name: String,
arguments: Option<serde_json::Value>,
timeout: Option<Duration>,
) -> Result<mcp_types::CallToolResult> {
match self {
McpClientAdapter::Legacy(client) => client.call_tool(name, arguments, timeout).await,
McpClientAdapter::Rmcp(client) => client.call_tool(name, arguments, timeout).await,
}
}
}
/// A thin wrapper around a set of running [`McpClient`] instances.
#[derive(Default)]
pub(crate) struct McpConnectionManager {
/// Server-name -> client instance.
@@ -119,7 +243,7 @@ pub(crate) struct McpConnectionManager {
}
impl McpConnectionManager {
/// Spawn a [`RmcpClient`] for each configured server.
/// Spawn a [`McpClient`] for each configured server.
///
/// * `mcp_servers` Map loaded from the user configuration where *keys*
/// are human-readable server identifiers and *values* are the spawn
@@ -129,6 +253,7 @@ impl McpConnectionManager {
/// user should be informed about these errors.
pub async fn new(
mcp_servers: HashMap<String, McpServerConfig>,
use_rmcp_client: bool,
store_mode: OAuthCredentialsStoreMode,
) -> Result<(Self, ClientStartErrors)> {
// Early exit if no servers are configured.
@@ -191,8 +316,7 @@ impl McpConnectionManager {
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
};
let resolved_bearer_token = resolved_bearer_token.unwrap_or_default();
let client_result = match transport {
let client = match transport {
McpServerTransportConfig::Stdio {
command,
args,
@@ -202,18 +326,17 @@ impl McpConnectionManager {
} => {
let command_os: OsString = command.into();
let args_os: Vec<OsString> = args.into_iter().map(Into::into).collect();
match RmcpClient::new_stdio_client(command_os, args_os, env, &env_vars, cwd)
.await
{
Ok(client) => {
let client = Arc::new(client);
client
.initialize(params.clone(), Some(startup_timeout))
.await
.map(|_| client)
}
Err(err) => Err(err.into()),
}
McpClientAdapter::new_stdio_client(
use_rmcp_client,
command_os,
args_os,
env,
env_vars,
cwd,
params,
startup_timeout,
)
.await
}
McpServerTransportConfig::StreamableHttp {
url,
@@ -221,32 +344,22 @@ impl McpConnectionManager {
env_http_headers,
..
} => {
match RmcpClient::new_streamable_http_client(
&server_name,
&url,
resolved_bearer_token.clone(),
McpClientAdapter::new_streamable_http_client(
server_name.clone(),
url,
resolved_bearer_token.unwrap_or_default(),
http_headers,
env_http_headers,
params,
startup_timeout,
store_mode,
)
.await
{
Ok(client) => {
let client = Arc::new(client);
client
.initialize(params.clone(), Some(startup_timeout))
.await
.map(|_| client)
}
Err(err) => Err(err),
}
}
};
}
.map(|c| (c, startup_timeout));
(
(server_name, tool_timeout),
client_result.map(|client| (client, startup_timeout)),
)
((server_name, tool_timeout), client)
});
}

View File

@@ -1,105 +0,0 @@
use crate::codex::Session;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use tracing::warn;
/// Process streamed `ResponseItem`s from the model into the pair of:
/// - items we should record in conversation history; and
/// - `ResponseInputItem`s to send back to the model on the next turn.
pub(crate) async fn process_items(
processed_items: Vec<crate::codex::ProcessedResponseItem>,
sess: &Session,
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
for processed_response_item in processed_items {
let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
match (&item, &response) {
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
// If the model returned a message, we need to record it.
items_to_record_in_conversation_history.push(item);
}
(
ResponseItem::LocalShellCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
});
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
});
}
(
ResponseItem::CustomToolCall { .. },
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: output.clone(),
});
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
) => {
items_to_record_in_conversation_history.push(item);
let output = match result {
Ok(call_tool_result) => {
crate::codex::convert_call_tool_result_to_function_call_output_payload(
call_tool_result,
)
}
Err(err) => FunctionCallOutputPayload {
content: err.clone(),
success: Some(false),
},
};
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output,
});
}
(
ResponseItem::Reasoning {
id,
summary,
content,
encrypted_content,
},
None,
) => {
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
id: id.clone(),
summary: summary.clone(),
content: content.clone(),
encrypted_content: encrypted_content.clone(),
});
}
_ => {
warn!("Unexpected response item: {item:?} with response: {response:?}");
}
};
if let Some(response) = response {
responses.push(response);
}
}
// Only attempt to take the lock if there is something to record.
if !items_to_record_in_conversation_history.is_empty() {
sess.record_conversation_items(&items_to_record_in_conversation_history)
.await;
}
(responses, items_to_record_in_conversation_history)
}

View File

@@ -24,6 +24,7 @@ use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::CompactedItem;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::InputMessageKind;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::RolloutLine;
use codex_protocol::protocol::SessionMeta;
@@ -542,6 +543,7 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
timestamp: ts.to_string(),
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: "hello".into(),
kind: Some(InputMessageKind::Plain),
images: None,
})),
};
@@ -625,6 +627,7 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
timestamp: ts.to_string(),
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: "hi".into(),
kind: Some(InputMessageKind::Plain),
images: None,
})),
};
@@ -709,6 +712,7 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
timestamp: ts.to_string(),
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: "hello".into(),
kind: Some(InputMessageKind::Plain),
images: None,
})),
};

View File

@@ -0,0 +1,68 @@
use async_channel::Sender;
use codex_protocol::ConversationId;
use codex_protocol::items::TurnItem;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ItemCompletedEvent;
use codex_protocol::protocol::ItemStartedEvent;
use tracing::error;
#[derive(Debug)]
pub(crate) struct ItemCollector {
thread_id: ConversationId,
turn_id: String,
tx_event: Sender<Event>,
}
impl ItemCollector {
pub fn new(
tx_event: Sender<Event>,
thread_id: ConversationId,
turn_id: String,
) -> ItemCollector {
ItemCollector {
tx_event,
thread_id,
turn_id,
}
}
pub async fn started(&self, item: TurnItem) {
let err = self
.tx_event
.send(Event {
id: self.turn_id.clone(),
msg: EventMsg::ItemStarted(ItemStartedEvent {
thread_id: self.thread_id,
turn_id: self.turn_id.clone(),
item,
}),
})
.await;
if let Err(e) = err {
error!("failed to send item started event: {e}");
}
}
pub async fn completed(&self, item: TurnItem) {
let err = self
.tx_event
.send(Event {
id: self.turn_id.clone(),
msg: EventMsg::ItemCompleted(ItemCompletedEvent {
thread_id: self.thread_id,
turn_id: self.turn_id.clone(),
item,
}),
})
.await;
if let Err(e) = err {
error!("failed to send item completed event: {e}");
}
}
pub async fn started_completed(&self, item: TurnItem) {
self.started(item.clone()).await;
self.completed(item).await;
}
}

View File

@@ -1,7 +1,9 @@
mod item_collector;
mod service;
mod session;
mod turn;
pub(crate) use item_collector::ItemCollector;
pub(crate) use service::SessionServices;
pub(crate) use session::SessionState;
pub(crate) use turn::ActiveTurn;

View File

@@ -36,12 +36,8 @@ impl SessionState {
self.history.record_items(items)
}
pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
self.history.get_history()
}
pub(crate) fn clone_history(&self) -> ConversationHistory {
self.history.clone()
pub(crate) fn history_snapshot(&self) -> Vec<ResponseItem> {
self.history.contents()
}
pub(crate) fn replace_history(&mut self, items: Vec<ResponseItem>) {

View File

@@ -13,10 +13,8 @@ use tokio_util::task::AbortOnDropHandle;
use tracing::trace;
use tracing::warn;
use crate::AuthManager;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::protocol::EventMsg;
use crate::protocol::TaskCompleteEvent;
use crate::protocol::TurnAbortReason;
@@ -46,14 +44,6 @@ impl SessionTaskContext {
pub(crate) fn clone_session(&self) -> Arc<Session> {
Arc::clone(&self.session)
}
pub(crate) fn auth_manager(&self) -> Arc<AuthManager> {
Arc::clone(&self.session.services.auth_manager)
}
pub(crate) async fn base_config(&self) -> Arc<Config> {
self.session.base_config().await
}
}
#[async_trait]

View File

@@ -1,18 +1,11 @@
use std::sync::Arc;
use async_trait::async_trait;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ReviewOutputEvent;
use codex_protocol::protocol::TaskCompleteEvent;
use tokio_util::sync::CancellationToken;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::codex_delegate::run_codex_conversation;
// use crate::config::Config; // no longer needed directly; use session.base_config()
use crate::review_format::format_review_findings_block;
use crate::codex::exit_review_mode;
use crate::codex::run_task;
use crate::state::TaskKind;
use codex_protocol::user_input::UserInput;
@@ -35,108 +28,11 @@ impl SessionTask for ReviewTask {
input: Vec<UserInput>,
cancellation_token: CancellationToken,
) -> Option<String> {
// let sess = session.clone_session();
// run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
let config = session.base_config().await.as_ref().clone();
let receiver =
match run_codex_conversation(config, session.auth_manager(), input, cancellation_token)
.await
{
Ok(r) => r,
Err(_) => return None,
};
while let Ok(event) = receiver.recv().await {
session
.clone_session()
.send_event(ctx.as_ref(), event.clone())
.await;
if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = event {
exit_review_mode(
session.clone_session(),
last_agent_message.as_deref().map(parse_review_output_event),
)
.await;
}
}
Some("".to_string())
let sess = session.clone_session();
run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
}
async fn abort(&self, session: Arc<SessionTaskContext>, _ctx: Arc<TurnContext>) {
exit_review_mode(session.clone_session(), None).await;
}
}
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
/// and records a developer message with the review output.
pub(crate) async fn exit_review_mode(
session: Arc<Session>,
review_output: Option<ReviewOutputEvent>,
) {
// ExitedReviewMode event can be emitted by the caller if needed.
let mut user_message = String::new();
if let Some(out) = review_output {
let mut findings_str = String::new();
let text = out.overall_explanation.trim();
if !text.is_empty() {
findings_str.push_str(text);
}
if !out.findings.is_empty() {
let block = format_review_findings_block(&out.findings, None);
findings_str.push_str(&format!("\n{block}"));
}
user_message.push_str(&format!(
r#"<user_action>
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
<action>review</action>
<results>
{findings_str}
</results>
</user_action>
"#));
} else {
user_message.push_str(r#"<user_action>
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
<action>review</action>
<results>
None.
</results>
</user_action>
"#);
}
session
.record_conversation_items(&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_message }],
}])
.await;
}
/// Parse the review output; when not valid JSON, build a structured
/// fallback that carries the plain text as the overall explanation.
///
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
// Try direct parse first
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
return ev;
}
// If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
&& start < end
&& let Some(slice) = text.get(start..=end)
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
{
return ev;
}
// Not JSON return a structured ReviewOutputEvent that carries
// the plain text as the overall explanation.
ReviewOutputEvent {
overall_explanation: text.to_string(),
..Default::default()
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
exit_review_mode(session.clone_session(), ctx, None).await;
}
}

View File

@@ -2,7 +2,6 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use tokio_util::either::Either;
use tokio_util::sync::CancellationToken;
use tokio_util::task::AbortOnDropHandle;
use crate::codex::Session;
@@ -10,10 +9,8 @@ use crate::codex::TurnContext;
use crate::error::CodexErr;
use crate::function_tool::FunctionCallError;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolPayload;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolRouter;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
pub(crate) struct ToolCallRuntime {
@@ -43,7 +40,6 @@ impl ToolCallRuntime {
pub(crate) fn handle_tool_call(
&self,
call: ToolCall,
cancellation_token: CancellationToken,
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);
@@ -52,24 +48,18 @@ impl ToolCallRuntime {
let turn = Arc::clone(&self.turn_context);
let tracker = Arc::clone(&self.tracker);
let lock = Arc::clone(&self.parallel_execution);
let aborted_response = Self::aborted_response(&call);
let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
AbortOnDropHandle::new(tokio::spawn(async move {
tokio::select! {
_ = cancellation_token.cancelled() => Ok(aborted_response),
res = async {
let _guard = if supports_parallel {
Either::Left(lock.read().await)
} else {
Either::Right(lock.write().await)
};
let _guard = if supports_parallel {
Either::Left(lock.read().await)
} else {
Either::Right(lock.write().await)
};
router
.dispatch_tool_call(session, turn, tracker, call)
.await
} => res,
}
router
.dispatch_tool_call(session, turn, tracker, call)
.await
}));
async move {
@@ -84,25 +74,3 @@ impl ToolCallRuntime {
}
}
}
impl ToolCallRuntime {
fn aborted_response(call: &ToolCall) -> ResponseInputItem {
match &call.payload {
ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
call_id: call.call_id.clone(),
output: "aborted".to_string(),
},
ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
call_id: call.call_id.clone(),
result: Err("aborted".to_string()),
},
_ => ResponseInputItem::FunctionCallOutput {
call_id: call.call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: None,
},
},
}
}
}

View File

@@ -1019,24 +1019,21 @@ mod tests {
}
}
// Avoid order-based assertions; compare via set containment instead.
fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[&str]) {
use std::collections::HashSet;
let mut names = HashSet::new();
let mut duplicates = Vec::new();
for name in tools.iter().map(|t| tool_name(&t.spec)) {
if !names.insert(name) {
duplicates.push(name);
}
}
assert!(
duplicates.is_empty(),
"duplicate tool entries detected: {duplicates:?}"
fn assert_eq_tool_names(tools: &[ConfiguredToolSpec], expected_names: &[&str]) {
let tool_names = tools
.iter()
.map(|tool| tool_name(&tool.spec))
.collect::<Vec<_>>();
assert_eq!(
tool_names.len(),
expected_names.len(),
"tool_name mismatch, {tool_names:?}, {expected_names:?}",
);
for expected in expected_subset {
assert!(
names.contains(expected),
"expected tool {expected} to be present; had: {names:?}"
for (name, expected_name) in tool_names.iter().zip(expected_names.iter()) {
assert_eq!(
name, expected_name,
"tool_name mismatch, {name:?}, {expected_name:?}"
);
}
}
@@ -1059,105 +1056,8 @@ mod tests {
.unwrap_or_else(|| panic!("expected tool {expected_name}"))
}
fn strip_descriptions_schema(schema: &mut JsonSchema) {
match schema {
JsonSchema::Boolean { description }
| JsonSchema::String { description }
| JsonSchema::Number { description } => {
*description = None;
}
JsonSchema::Array { items, description } => {
strip_descriptions_schema(items);
*description = None;
}
JsonSchema::Object {
properties,
required: _,
additional_properties,
} => {
for v in properties.values_mut() {
strip_descriptions_schema(v);
}
if let Some(AdditionalProperties::Schema(s)) = additional_properties {
strip_descriptions_schema(s);
}
}
}
}
fn strip_descriptions_tool(spec: &mut ToolSpec) {
match spec {
ToolSpec::Function(ResponsesApiTool { parameters, .. }) => {
strip_descriptions_schema(parameters);
}
ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {}
}
}
#[test]
fn test_full_toolset_specs_for_gpt5_codex() {
let model_family = find_family_for_model("gpt-5-codex")
.expect("gpt-5-codex should be a valid model family");
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::WebSearchRequest);
features.enable(Feature::ViewImageTool);
let config = ToolsConfig::new(&ToolsConfigParams {
model_family: &model_family,
features: &features,
});
let (tools, _) = build_specs(&config, None).build();
// Build actual map name -> spec
use std::collections::BTreeMap;
use std::collections::HashSet;
let mut actual: BTreeMap<String, ToolSpec> = BTreeMap::new();
let mut duplicate_names = Vec::new();
for t in &tools {
let name = tool_name(&t.spec).to_string();
if actual.insert(name.clone(), t.spec.clone()).is_some() {
duplicate_names.push(name);
}
}
assert!(
duplicate_names.is_empty(),
"duplicate tool entries detected: {duplicate_names:?}"
);
// Build expected from the same helpers used by the builder.
let mut expected: BTreeMap<String, ToolSpec> = BTreeMap::new();
for spec in [
create_exec_command_tool(),
create_write_stdin_tool(),
create_shell_tool(),
create_list_mcp_resources_tool(),
create_list_mcp_resource_templates_tool(),
create_read_mcp_resource_tool(),
PLAN_TOOL.clone(),
create_apply_patch_freeform_tool(),
ToolSpec::WebSearch {},
create_view_image_tool(),
] {
expected.insert(tool_name(&spec).to_string(), spec);
}
// Exact name set match — this is the only test allowed to fail when tools change.
let actual_names: HashSet<_> = actual.keys().cloned().collect();
let expected_names: HashSet<_> = expected.keys().cloned().collect();
assert_eq!(actual_names, expected_names, "tool name set mismatch");
// Compare specs ignoring human-readable descriptions.
for name in expected.keys() {
let mut a = actual.get(name).expect("present").clone();
let mut e = expected.get(name).expect("present").clone();
strip_descriptions_tool(&mut a);
strip_descriptions_tool(&mut e);
assert_eq!(a, e, "spec mismatch for {name}");
}
}
#[test]
fn test_build_specs_contains_expected_basics() {
fn test_build_specs() {
let model_family = find_family_for_model("codex-mini-latest")
.expect("codex-mini-latest should be a valid model family");
let mut features = Features::with_defaults();
@@ -1168,25 +1068,25 @@ mod tests {
features: &features,
});
let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
let tool_names = tools.iter().map(|t| t.spec.name()).collect::<Vec<_>>();
assert_eq!(
&tool_names,
&[
"exec_command",
"write_stdin",
"local_shell",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
]
);
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
}
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
]);
assert_eq_tool_names(&tools, &expected);
}
#[test]
fn test_build_specs_default_shell_present() {
fn test_build_specs_default_shell() {
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
let mut features = Features::with_defaults();
features.enable(Feature::WebSearchRequest);
@@ -1197,12 +1097,20 @@ mod tests {
});
let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
// Only check the shell variant and a couple of core tools.
let mut subset = vec!["exec_command", "write_stdin", "update_plan"];
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
subset.push(shell_tool);
expected.push(shell_tool);
}
assert_contains_tool_names(&tools, &subset);
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
]);
assert_eq_tool_names(&tools, &expected);
}
#[test]
@@ -1257,7 +1165,7 @@ mod tests {
}
#[test]
fn test_build_specs_mcp_tools_converted() {
fn test_build_specs_mcp_tools() {
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
@@ -1305,6 +1213,22 @@ mod tests {
)
.build();
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
}
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"web_search",
"view_image",
"test_server/do_something_cool",
]);
assert_eq_tool_names(&tools, &expected);
let tool = find_tool(&tools, "test_server/do_something_cool");
assert_eq!(
&tool.spec,
@@ -1410,19 +1334,23 @@ mod tests {
]);
let (tools, _) = build_specs(&config, Some(tools_map)).build();
// Expect exec_command/write_stdin first, followed by MCP tools sorted by fully-qualified name.
let mut expected = vec!["exec_command", "write_stdin"];
if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
}
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
"test_server/cool",
"test_server/do",
"test_server/something",
]);
// Only assert that the MCP tools themselves are sorted by fully-qualified name.
let mcp_names: Vec<_> = tools
.iter()
.map(|t| tool_name(&t.spec).to_string())
.filter(|n| n.starts_with("test_server/"))
.collect();
let expected = vec![
"test_server/cool".to_string(),
"test_server/do".to_string(),
"test_server/something".to_string(),
];
assert_eq!(mcp_names, expected);
assert_eq_tool_names(&tools, &expected);
}
#[test]
@@ -1461,9 +1389,28 @@ mod tests {
)
.build();
let tool = find_tool(&tools, "dash/search");
let mut expected = vec!["exec_command", "write_stdin"];
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/search",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!(
tool.spec,
tools[if has_shell { 10 } else { 9 }].spec,
ToolSpec::Function(ResponsesApiTool {
name: "dash/search".to_string(),
parameters: JsonSchema::Object {
@@ -1516,9 +1463,27 @@ mod tests {
)
.build();
let tool = find_tool(&tools, "dash/paginate");
let mut expected = vec!["exec_command", "write_stdin"];
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/paginate",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!(
tool.spec,
tools[if has_shell { 10 } else { 9 }].spec,
ToolSpec::Function(ResponsesApiTool {
name: "dash/paginate".to_string(),
parameters: JsonSchema::Object {
@@ -1570,9 +1535,26 @@ mod tests {
)
.build();
let tool = find_tool(&tools, "dash/tags");
let mut expected = vec!["exec_command", "write_stdin"];
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/tags",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!(
tool.spec,
tools[if has_shell { 10 } else { 9 }].spec,
ToolSpec::Function(ResponsesApiTool {
name: "dash/tags".to_string(),
parameters: JsonSchema::Object {
@@ -1626,9 +1608,26 @@ mod tests {
)
.build();
let tool = find_tool(&tools, "dash/value");
let mut expected = vec!["exec_command", "write_stdin"];
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"dash/value",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!(
tool.spec,
tools[if has_shell { 10 } else { 9 }].spec,
ToolSpec::Function(ResponsesApiTool {
name: "dash/value".to_string(),
parameters: JsonSchema::Object {
@@ -1719,9 +1718,28 @@ mod tests {
)
.build();
let tool = find_tool(&tools, "test_server/do_something_cool");
let mut expected = vec!["exec_command", "write_stdin"];
let has_shell = if let Some(shell_tool) = shell_tool_name(&config) {
expected.push(shell_tool);
true
} else {
false
};
expected.extend([
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"web_search",
"view_image",
"test_server/do_something_cool",
]);
assert_eq_tool_names(&tools, &expected);
assert_eq!(
tool.spec,
tools[if has_shell { 10 } else { 9 }].spec,
ToolSpec::Function(ResponsesApiTool {
name: "test_server/do_something_cool".to_string(),
parameters: JsonSchema::Object {

View File

@@ -8,12 +8,12 @@ use codex_core::LocalShellStatus;
use codex_core::ModelClient;
use codex_core::ModelProviderInfo;
use codex_core::Prompt;
use codex_core::ReasoningItemContent;
use codex_core::ResponseItem;
use codex_core::WireApi;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ReasoningItemContent;
use core_test_support::load_default_config_for_test;
use futures::StreamExt;
use serde_json::Value;

View File

@@ -13,7 +13,6 @@ use codex_core::WireApi;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ReasoningItemContent;
use core_test_support::load_default_config_for_test;
use futures::StreamExt;
use tempfile::TempDir;
@@ -144,8 +143,8 @@ fn assert_reasoning(item: &ResponseItem, expected: &str) {
let mut combined = String::new();
for part in parts {
match part {
ReasoningItemContent::ReasoningText { text }
| ReasoningItemContent::Text { text } => combined.push_str(text),
codex_core::ReasoningItemContent::ReasoningText { text }
| codex_core::ReasoningItemContent::Text { text } => combined.push_str(text),
}
}
assert_eq!(combined, expected);

View File

@@ -35,22 +35,6 @@ impl ResponseMock {
pub fn requests(&self) -> Vec<ResponsesRequest> {
self.requests.lock().unwrap().clone()
}
/// Returns true if any captured request contains a `function_call` with the
/// provided `call_id`.
pub fn saw_function_call(&self, call_id: &str) -> bool {
self.requests()
.iter()
.any(|req| req.has_function_call(call_id))
}
/// Returns the `output` string for a matching `function_call_output` with
/// the provided `call_id`, searching across all captured requests.
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
self.requests()
.iter()
.find_map(|req| req.function_call_output_text(call_id))
}
}
#[derive(Debug, Clone)]
@@ -86,28 +70,6 @@ impl ResponsesRequest {
.unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
}
/// Returns true if this request's `input` contains a `function_call` with
/// the specified `call_id`.
pub fn has_function_call(&self, call_id: &str) -> bool {
self.input().iter().any(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})
}
/// If present, returns the `output` string of the `function_call_output`
/// entry matching `call_id` in this request's `input`.
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
let binding = self.input();
let item = binding.iter().find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})?;
item.get("output")
.and_then(Value::as_str)
.map(str::to_string)
}
pub fn header(&self, name: &str) -> Option<String> {
self.0
.headers
@@ -135,10 +97,6 @@ impl Match for ResponseMock {
.lock()
.unwrap()
.push(ResponsesRequest(request.clone()));
// Enforce invariant checks on every request body captured by the mock.
// Panic on orphan tool outputs or calls to catch regressions early.
validate_request_body_invariants(request);
true
}
}
@@ -209,56 +167,6 @@ pub fn ev_assistant_message(id: &str, text: &str) -> Value {
})
}
pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Value {
let summary_entries: Vec<Value> = summary
.iter()
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
.collect();
let mut event = serde_json::json!({
"type": "response.output_item.done",
"item": {
"type": "reasoning",
"id": id,
"summary": summary_entries,
}
});
if !raw_content.is_empty() {
let content_entries: Vec<Value> = raw_content
.iter()
.map(|text| serde_json::json!({"type": "reasoning_text", "text": text}))
.collect();
event["item"]["content"] = Value::Array(content_entries);
}
event
}
pub fn ev_web_search_call_added(id: &str, status: &str, query: &str) -> Value {
serde_json::json!({
"type": "response.output_item.added",
"item": {
"type": "web_search_call",
"id": id,
"status": status,
"action": {"type": "search", "query": query}
}
})
}
pub fn ev_web_search_call_done(id: &str, status: &str, query: &str) -> Value {
serde_json::json!({
"type": "response.output_item.done",
"item": {
"type": "web_search_call",
"id": id,
"status": status,
"action": {"type": "search", "query": query}
}
})
}
pub fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
serde_json::json!({
"type": "response.output_item.done",
@@ -428,90 +336,3 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
response_mock
}
/// Validate invariants on the request body sent to `/v1/responses`.
///
/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
/// - Every `function_call_output` must match a prior `function_call` or
/// `local_shell_call` with the same `call_id` in the same `input`.
/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
/// in the `input` must have a matching output entry.
fn validate_request_body_invariants(request: &wiremock::Request) {
let Ok(body): Result<Value, _> = request.body_json() else {
return;
};
let Some(items) = body.get("input").and_then(Value::as_array) else {
panic!("input array not found in request");
};
use std::collections::HashSet;
fn get_call_id(item: &Value) -> Option<&str> {
item.get("call_id")
.and_then(Value::as_str)
.filter(|id| !id.is_empty())
}
fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
items
.iter()
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
.filter_map(get_call_id)
.map(str::to_string)
.collect()
}
fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
items
.iter()
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
.map(|item| {
let Some(id) = get_call_id(item) else {
panic!("{missing_msg}");
};
id.to_string()
})
.collect()
}
let function_calls = gather_ids(items, "function_call");
let custom_tool_calls = gather_ids(items, "custom_tool_call");
let local_shell_calls = gather_ids(items, "local_shell_call");
let function_call_outputs = gather_output_ids(
items,
"function_call_output",
"orphan function_call_output with empty call_id should be dropped",
);
let custom_tool_call_outputs = gather_output_ids(
items,
"custom_tool_call_output",
"orphan custom_tool_call_output with empty call_id should be dropped",
);
for cid in &function_call_outputs {
assert!(
function_calls.contains(cid) || local_shell_calls.contains(cid),
"function_call_output without matching call in input: {cid}",
);
}
for cid in &custom_tool_call_outputs {
assert!(
custom_tool_calls.contains(cid),
"custom_tool_call_output without matching call in input: {cid}",
);
}
for cid in &function_calls {
assert!(
function_call_outputs.contains(cid),
"Function call output is missing for call id: {cid}",
);
}
for cid in &custom_tool_calls {
assert!(
custom_tool_call_outputs.contains(cid),
"Custom tool call output is missing for call id: {cid}",
);
}
}

View File

@@ -1,4 +1,3 @@
use std::sync::Arc;
use std::time::Duration;
use codex_core::protocol::EventMsg;
@@ -6,9 +5,7 @@ use codex_core::protocol::Op;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
@@ -70,98 +67,3 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
)
.await;
}
/// After an interrupt we expect the next request to the model to include both
/// the original tool call and an `"aborted"` `function_call_output`. This test
/// exercises the follow-up flow: it sends another user turn, inspects the mock
/// responses server, and ensures the model receives the synthesized abort.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn interrupt_tool_records_history_entries() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"sleep 60".to_string(),
];
let call_id = "call-history";
let args = json!({
"command": command,
"timeout_ms": 60_000
})
.to_string();
let first_body = sse(vec![
ev_response_created("resp-history"),
ev_function_call(call_id, "shell", &args),
ev_completed("resp-history"),
]);
let follow_up_body = sse(vec![
ev_response_created("resp-followup"),
ev_completed("resp-followup"),
]);
let server = start_mock_server().await;
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
let fixture = test_codex().build(&server).await.unwrap();
let codex = Arc::clone(&fixture.codex);
let wait_timeout = Duration::from_millis(100);
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "start history recording".into(),
}],
})
.await
.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
wait_timeout,
)
.await;
codex.submit(Op::Interrupt).await.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::TurnAborted(_)),
wait_timeout,
)
.await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "follow up".into(),
}],
})
.await
.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::TaskComplete(_)),
wait_timeout,
)
.await;
let requests = response_mock.requests();
assert!(
requests.len() == 2,
"expected two calls to the responses API, got {}",
requests.len()
);
assert!(
response_mock.saw_function_call(call_id),
"function call not recorded in responses payload"
);
assert_eq!(
response_mock.function_call_output_text(call_id).as_deref(),
Some("aborted"),
"aborted function call output not recorded in responses payload"
);
}

View File

@@ -9,6 +9,7 @@ use codex_core::ModelClient;
use codex_core::ModelProviderInfo;
use codex_core::NewConversation;
use codex_core::Prompt;
use codex_core::ReasoningItemContent;
use codex_core::ResponseEvent;
use codex_core::ResponseItem;
use codex_core::WireApi;
@@ -20,7 +21,6 @@ use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::WebSearchAction;
use codex_protocol::user_input::UserInput;

View File

@@ -1,15 +1,17 @@
use codex_core::CodexAuth;
use codex_core::ContentItem;
use codex_core::ConversationManager;
use codex_core::ModelProviderInfo;
use codex_core::NewConversation;
use codex_core::ResponseItem;
use codex_core::built_in_model_providers;
use codex_core::parse_turn_item;
use codex_core::content_items_to_text;
use codex_core::is_session_prefix_message;
use codex_core::protocol::ConversationPathResponseEvent;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_protocol::items::TurnItem;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::skip_if_no_network;
@@ -113,12 +115,19 @@ async fn fork_conversation_twice_drops_to_first_message() {
let find_user_input_positions = |items: &[RolloutItem]| -> Vec<usize> {
let mut pos = Vec::new();
for (i, it) in items.iter().enumerate() {
if let RolloutItem::ResponseItem(response_item) = it
&& let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item)
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = it
&& role == "user"
&& content_items_to_text(content)
.is_some_and(|text| !is_session_prefix_message(&text))
{
// Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem.
// We specifically look for input items, which are represented as ContentItem::InputText.
pos.push(i);
if content
.iter()
.any(|c| matches!(c, ContentItem::InputText { .. }))
{
pos.push(i);
}
}
}
pos

View File

@@ -2,18 +2,12 @@
use anyhow::Ok;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ItemCompletedEvent;
use codex_core::protocol::ItemStartedEvent;
use codex_core::protocol::Op;
use codex_protocol::items::TurnItem;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_reasoning_item;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::ev_web_search_call_added;
use core_test_support::responses::ev_web_search_call_done;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
@@ -32,7 +26,7 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]);
mount_sse_once_match(&server, any(), first_response).await;
responses::mount_sse_once_match(&server, any(), first_response).await;
codex
.submit(Op::UserInput {
@@ -42,23 +36,21 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
})
.await?;
let started_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::UserMessage(item),
..
}) => Some(item.clone()),
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(e) => Some(e.clone()),
_ => None,
})
.await;
let completed_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::UserMessage(item),
..
}) => Some(item.clone()),
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(e) => Some(e.clone()),
_ => None,
})
.await;
let TurnItem::UserMessage(started_item) = started.item;
let TurnItem::UserMessage(completed_item) = completed.item;
assert_eq!(started_item.id, completed_item.id);
assert_eq!(
started_item.content,
@@ -74,163 +66,3 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "all done"),
ev_completed("resp-1"),
]);
mount_sse_once_match(&server, any(), first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "please summarize results".into(),
}],
})
.await?;
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::AgentMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started.id, completed.id);
let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first()
else {
panic!("expected agent message text content");
};
assert_eq!(text, "all done");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let reasoning_item = ev_reasoning_item(
"reasoning-1",
&["Consider inputs", "Compute output"],
&["Detailed reasoning trace"],
);
let first_response = sse(vec![
ev_response_created("resp-1"),
reasoning_item,
ev_completed("resp-1"),
]);
mount_sse_once_match(&server, any(), first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "explain your reasoning".into(),
}],
})
.await?;
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started.id, completed.id);
assert_eq!(
completed.summary_text,
vec!["Consider inputs".to_string(), "Compute output".to_string()]
);
assert_eq!(
completed.raw_content,
vec!["Detailed reasoning trace".to_string()]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn web_search_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let web_search_added =
ev_web_search_call_added("web-search-1", "in_progress", "weather seattle");
let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");
let first_response = sse(vec![
ev_response_created("resp-1"),
web_search_added,
web_search_done,
ev_completed("resp-1"),
]);
mount_sse_once_match(&server, any(), first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "find the weather".into(),
}],
})
.await?;
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::WebSearch(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::WebSearch(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started.id, completed.id);
assert_eq!(completed.query, "weather seattle");
Ok(())
}

View File

@@ -4,7 +4,6 @@ use codex_core::protocol::Op;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_reasoning_item;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
@@ -63,59 +62,3 @@ async fn resume_includes_initial_messages_from_rollout_events() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.show_raw_agent_reasoning = true;
});
let initial = builder.build(&server).await?;
let codex = Arc::clone(&initial.codex);
let home = initial.home.clone();
let rollout_path = initial.session_configured.rollout_path.clone();
let initial_sse = sse(vec![
ev_response_created("resp-initial"),
ev_reasoning_item("reason-1", &["Summarized step"], &["raw detail"]),
ev_assistant_message("msg-1", "Completed reasoning turn"),
ev_completed("resp-initial"),
]);
mount_sse_once_match(&server, any(), initial_sse).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "Record reasoning messages".into(),
}],
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let resumed = builder.resume(&server, home, rollout_path).await?;
let initial_messages = resumed
.session_configured
.initial_messages
.expect("expected initial messages to be present for resumed session");
match initial_messages.as_slice() {
[
EventMsg::UserMessage(first_user),
EventMsg::TokenCount(_),
EventMsg::AgentReasoning(reasoning),
EventMsg::AgentReasoningRawContent(raw),
EventMsg::AgentMessage(assistant_message),
EventMsg::TokenCount(_),
] => {
assert_eq!(first_user.message, "Record reasoning messages");
assert_eq!(reasoning.text, "Summarized step");
assert_eq!(raw.text, "raw detail");
assert_eq!(assistant_message.message, "Completed reasoning turn");
}
other => panic!("unexpected initial messages after resume: {other:#?}"),
}
Ok(())
}

View File

@@ -227,6 +227,62 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex();
let test = builder.build(&server).await?;
let local_shell_event = json!({
"type": "response.output_item.done",
"item": {
"type": "local_shell_call",
"status": "completed",
"action": {
"type": "exec",
"command": ["/bin/echo", "hi"],
}
}
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
local_shell_event,
ev_completed("resp-1"),
]),
)
.await;
let second_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
submit_turn(
&test,
"check shell output",
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
)
.await?;
let item = second_mock.single_request().function_call_output("");
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
assert_eq!(
item.get("output").and_then(Value::as_str),
Some("LocalShellCall without call_id or id"),
);
Ok(())
}
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
let server = start_mock_server().await;

View File

@@ -18,6 +18,7 @@ use codex_core::NewConversation;
use codex_core::auth::enforce_login_restrictions;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::features::Feature;
use codex_core::git_info::get_git_repo_root;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Event;
@@ -191,6 +192,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
};
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
let approve_all_enabled = config.features.enabled(Feature::ApproveAll);
if let Err(err) = enforce_login_restrictions(&config).await {
eprintln!("{err}");
@@ -364,6 +366,34 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
if matches!(event.msg, EventMsg::Error(_)) {
error_seen = true;
}
// Auto-approve requests when the approve_all feature is enabled.
if approve_all_enabled {
match &event.msg {
EventMsg::ExecApprovalRequest(_) => {
if let Err(e) = conversation
.submit(Op::ExecApproval {
id: event.id.clone(),
decision: codex_core::protocol::ReviewDecision::Approved,
})
.await
{
error!("failed to auto-approve exec: {e}");
}
}
EventMsg::ApplyPatchApprovalRequest(_) => {
if let Err(e) = conversation
.submit(Op::PatchApproval {
id: event.id.clone(),
decision: codex_core::protocol::ReviewDecision::Approved,
})
.await
{
error!("failed to auto-approve patch: {e}");
}
}
_ => {}
}
}
let shutdown: CodexStatus = event_processor.process_event(event);
match shutdown {
CodexStatus::Running => continue,

View File

@@ -0,0 +1,81 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Result;
use core_test_support::responses;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use serde_json::json;
async fn run_exec_with_args(args: &[&str]) -> Result<String> {
let test = test_codex_exec();
let call_id = "exec-approve";
let exec_args = json!({
"command": [
if cfg!(windows) { "cmd.exe" } else { "/bin/sh" },
if cfg!(windows) { "/C" } else { "-lc" },
"echo approve-all-ok",
],
"timeout_ms": 1500,
"with_escalated_permissions": true
});
let response_streams = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&exec_args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let server = responses::start_mock_server().await;
let mock = mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server).args(args).assert().success();
let requests = mock.requests();
assert!(requests.len() >= 2, "expected at least two responses POSTs");
let item = requests[1].function_call_output(call_id);
let output_str = item
.get("output")
.and_then(Value::as_str)
.expect("function_call_output.output should be a string");
Ok(output_str.to_string())
}
/// Setting `features.approve_all=true` should switch to auto-approvals.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn approve_all_auto_accepts_exec() -> Result<()> {
skip_if_no_network!(Ok(()));
let output = run_exec_with_args(&[
"--skip-git-repo-check",
"-c",
"features.approve_all=true",
"train",
])
.await?;
assert!(
output.contains("Exit code: 0"),
"expected Exit code: 0 in output: {output}"
);
assert!(
output.contains("approve-all-ok"),
"expected command output in response: {output}"
);
Ok(())
}

View File

@@ -1,5 +1,6 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
mod approve_all;
mod auth_env;
mod originator;
mod output_schema;

View File

@@ -0,0 +1,23 @@
[package]
name = "codex-mcp-client"
version = { workspace = true }
edition = "2024"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
mcp-types = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tracing = { workspace = true, features = ["log"] }
tracing-subscriber = { workspace = true, features = ["fmt", "env-filter"] }
tokio = { workspace = true, features = [
"io-util",
"macros",
"process",
"rt-multi-thread",
"sync",
"time",
] }

View File

@@ -0,0 +1,3 @@
mod mcp_client;
pub use mcp_client::McpClient;

View File

@@ -0,0 +1,88 @@
//! Simple command-line utility to exercise `McpClient`.
//!
//! Example usage:
//!
//! ```bash
//! cargo run -p codex-mcp-client -- `codex-mcp-server`
//! ```
//!
//! Any additional arguments after the first one are forwarded to the spawned
//! program. The utility connects, issues a `tools/list` request and prints the
//! server's response as pretty JSON.
use std::ffi::OsString;
use std::time::Duration;
use anyhow::Context;
use anyhow::Result;
use codex_mcp_client::McpClient;
use mcp_types::ClientCapabilities;
use mcp_types::Implementation;
use mcp_types::InitializeRequestParams;
use mcp_types::ListToolsRequestParams;
use mcp_types::MCP_SCHEMA_VERSION;
use tracing_subscriber::EnvFilter;
#[tokio::main]
async fn main() -> Result<()> {
let default_level = "debug";
let _ = tracing_subscriber::fmt()
// Fallback to the `default_level` log filter if the environment
// variable is not set _or_ contains an invalid value
.with_env_filter(
EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::try_new(default_level))
.unwrap_or_else(|_| EnvFilter::new(default_level)),
)
.with_writer(std::io::stderr)
.try_init();
// Collect command-line arguments excluding the program name itself.
let mut args: Vec<OsString> = std::env::args_os().skip(1).collect();
if args.is_empty() || args[0] == "--help" || args[0] == "-h" {
eprintln!("Usage: mcp-client <program> [args..]\n\nExample: mcp-client codex-mcp-server");
std::process::exit(1);
}
let original_args = args.clone();
// Spawn the subprocess and connect the client.
let program = args.remove(0);
let env = None;
let client = McpClient::new_stdio_client(program, args, env, &[], None)
.await
.with_context(|| format!("failed to spawn subprocess: {original_args:?}"))?;
let params = InitializeRequestParams {
capabilities: ClientCapabilities {
experimental: None,
roots: None,
sampling: None,
elicitation: None,
},
client_info: Implementation {
name: "codex-mcp-client".to_owned(),
version: env!("CARGO_PKG_VERSION").to_owned(),
title: Some("Codex".to_string()),
// This field is used by Codex when it is an MCP server: it should
// not be used when Codex is an MCP client.
user_agent: None,
},
protocol_version: MCP_SCHEMA_VERSION.to_owned(),
};
let timeout = Some(Duration::from_secs(10));
let response = client.initialize(params, timeout).await?;
eprintln!("initialize response: {response:?}");
// Issue `tools/list` request (no params).
let timeout = None;
let tools = client
.list_tools(None::<ListToolsRequestParams>, timeout)
.await
.context("tools/list request failed")?;
// Print the result in a human readable form.
println!("{}", serde_json::to_string_pretty(&tools)?);
Ok(())
}

View File

@@ -0,0 +1,509 @@
//! A minimal async client for the Model Context Protocol (MCP).
//!
//! The client is intentionally lightweight it is only capable of:
//! 1. Spawning a subprocess that launches a conforming MCP server that
//! communicates over stdio.
//! 2. Sending MCP requests and pairing them with their corresponding
//! responses.
//! 3. Offering a convenience helper for the common `tools/list` request.
//!
//! The crate hides all JSONRPC framing details behind a typed API. Users
//! interact with the [`ModelContextProtocolRequest`] trait from `mcp-types` to
//! issue requests and receive strongly-typed results.
use std::collections::HashMap;
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicI64;
use std::sync::atomic::Ordering;
use std::time::Duration;
use anyhow::Context;
use anyhow::Result;
use anyhow::anyhow;
use mcp_types::CallToolRequest;
use mcp_types::CallToolRequestParams;
use mcp_types::InitializeRequest;
use mcp_types::InitializeRequestParams;
use mcp_types::InitializedNotification;
use mcp_types::JSONRPC_VERSION;
use mcp_types::JSONRPCMessage;
use mcp_types::JSONRPCNotification;
use mcp_types::JSONRPCRequest;
use mcp_types::JSONRPCResponse;
use mcp_types::ListToolsRequest;
use mcp_types::ListToolsRequestParams;
use mcp_types::ListToolsResult;
use mcp_types::ModelContextProtocolNotification;
use mcp_types::ModelContextProtocolRequest;
use mcp_types::RequestId;
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
use tokio::process::Command;
use tokio::sync::Mutex;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tokio::time;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::warn;
/// Capacity of the bounded channels used for transporting messages between the
/// client API and the IO tasks.
const CHANNEL_CAPACITY: usize = 128;
/// Internal representation of a pending request sender.
type PendingSender = oneshot::Sender<JSONRPCMessage>;
/// A running MCP client instance.
pub struct McpClient {
/// Retain this child process until the client is dropped. The Tokio runtime
/// will make a "best effort" to reap the process after it exits, but it is
/// not a guarantee. See the `kill_on_drop` documentation for details.
#[allow(dead_code)]
child: tokio::process::Child,
/// Channel for sending JSON-RPC messages *to* the background writer task.
outgoing_tx: mpsc::Sender<JSONRPCMessage>,
/// Map of `request.id -> oneshot::Sender` used to dispatch responses back
/// to the originating caller.
pending: Arc<Mutex<HashMap<i64, PendingSender>>>,
/// Monotonically increasing counter used to generate request IDs.
id_counter: AtomicI64,
}
impl McpClient {
/// Spawn the given command and establish an MCP session over its STDIO.
/// Caller is responsible for sending the `initialize` request. See
/// [`initialize`](Self::initialize) for details.
pub async fn new_stdio_client(
program: OsString,
args: Vec<OsString>,
env: Option<HashMap<String, String>>,
env_vars: &[String],
cwd: Option<PathBuf>,
) -> std::io::Result<Self> {
let mut command = Command::new(program);
command
.args(args)
.env_clear()
.envs(create_env_for_mcp_server(env, env_vars))
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
// As noted in the `kill_on_drop` documentation, the Tokio runtime makes
// a "best effort" to reap-after-exit to avoid zombie processes, but it
// is not a guarantee.
.kill_on_drop(true);
if let Some(cwd) = cwd {
command.current_dir(cwd);
}
let mut child = command.spawn()?;
let stdin = child
.stdin
.take()
.ok_or_else(|| std::io::Error::other("failed to capture child stdin"))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| std::io::Error::other("failed to capture child stdout"))?;
let (outgoing_tx, mut outgoing_rx) = mpsc::channel::<JSONRPCMessage>(CHANNEL_CAPACITY);
let pending: Arc<Mutex<HashMap<i64, PendingSender>>> = Arc::new(Mutex::new(HashMap::new()));
// Spawn writer task. It listens on the `outgoing_rx` channel and
// writes messages to the child's STDIN.
let writer_handle = {
let mut stdin = stdin;
tokio::spawn(async move {
while let Some(msg) = outgoing_rx.recv().await {
match serde_json::to_string(&msg) {
Ok(json) => {
debug!("MCP message to server: {json}");
if stdin.write_all(json.as_bytes()).await.is_err() {
error!("failed to write message to child stdin");
break;
}
if stdin.write_all(b"\n").await.is_err() {
error!("failed to write newline to child stdin");
break;
}
// No explicit flush needed on a pipe; write_all is sufficient.
}
Err(e) => error!("failed to serialize JSONRPCMessage: {e}"),
}
}
})
};
// Spawn reader task. It reads line-delimited JSON from the child's
// STDOUT and dispatches responses to the pending map.
let reader_handle = {
let pending = pending.clone();
let mut lines = BufReader::new(stdout).lines();
tokio::spawn(async move {
while let Ok(Some(line)) = lines.next_line().await {
debug!("MCP message from server: {line}");
match serde_json::from_str::<JSONRPCMessage>(&line) {
Ok(JSONRPCMessage::Response(resp)) => {
Self::dispatch_response(resp, &pending).await;
}
Ok(JSONRPCMessage::Error(err)) => {
Self::dispatch_error(err, &pending).await;
}
Ok(JSONRPCMessage::Notification(JSONRPCNotification { .. })) => {
// For now we only log server-initiated notifications.
info!("<- notification: {}", line);
}
Ok(other) => {
// Batch responses and requests are currently not
// expected from the server log and ignore.
info!("<- unhandled message: {:?}", other);
}
Err(e) => {
error!("failed to deserialize JSONRPCMessage: {e}; line = {}", line)
}
}
}
})
};
// We intentionally *detach* the tasks. They will keep running in the
// background as long as their respective resources (channels/stdin/
// stdout) are alive. Dropping `McpClient` cancels the tasks due to
// dropped resources.
let _ = (writer_handle, reader_handle);
Ok(Self {
child,
outgoing_tx,
pending,
id_counter: AtomicI64::new(1),
})
}
/// Send an arbitrary MCP request and await the typed result.
///
/// If `timeout` is `None` the call waits indefinitely. If `Some(duration)`
/// is supplied and no response is received within the given period, a
/// timeout error is returned.
pub async fn send_request<R>(
&self,
params: R::Params,
timeout: Option<Duration>,
) -> Result<R::Result>
where
R: ModelContextProtocolRequest,
R::Params: Serialize,
R::Result: DeserializeOwned,
{
// Create a new unique ID.
let id = self.id_counter.fetch_add(1, Ordering::SeqCst);
let request_id = RequestId::Integer(id);
// Serialize params -> JSON. For many request types `Params` is
// `Option<T>` and `None` should be encoded as *absence* of the field.
let params_json = serde_json::to_value(&params)?;
let params_field = if params_json.is_null() {
None
} else {
Some(params_json)
};
let jsonrpc_request = JSONRPCRequest {
id: request_id.clone(),
jsonrpc: JSONRPC_VERSION.to_string(),
method: R::METHOD.to_string(),
params: params_field,
};
let message = JSONRPCMessage::Request(jsonrpc_request);
// oneshot channel for the response.
let (tx, rx) = oneshot::channel();
// Register in pending map *before* sending the message so a race where
// the response arrives immediately cannot be lost.
{
let mut guard = self.pending.lock().await;
guard.insert(id, tx);
}
// Send to writer task.
if self.outgoing_tx.send(message).await.is_err() {
return Err(anyhow!(
"failed to send message to writer task - channel closed"
));
}
// Await the response, optionally bounded by a timeout.
let msg = match timeout {
Some(duration) => {
match time::timeout(duration, rx).await {
Ok(Ok(msg)) => msg,
Ok(Err(_)) => {
// Channel closed without a reply remove the pending entry.
let mut guard = self.pending.lock().await;
guard.remove(&id);
return Err(anyhow!(
"response channel closed before a reply was received"
));
}
Err(_) => {
// Timed out. Remove the pending entry so we don't leak.
let mut guard = self.pending.lock().await;
guard.remove(&id);
return Err(anyhow!("request timed out"));
}
}
}
None => rx
.await
.map_err(|_| anyhow!("response channel closed before a reply was received"))?,
};
match msg {
JSONRPCMessage::Response(JSONRPCResponse { result, .. }) => {
let typed: R::Result = serde_json::from_value(result)?;
Ok(typed)
}
JSONRPCMessage::Error(err) => Err(anyhow!(format!(
"server returned JSON-RPC error: code = {}, message = {}",
err.error.code, err.error.message
))),
other => Err(anyhow!(format!(
"unexpected message variant received in reply path: {other:?}"
))),
}
}
pub async fn send_notification<N>(&self, params: N::Params) -> Result<()>
where
N: ModelContextProtocolNotification,
N::Params: Serialize,
{
// Serialize params -> JSON. For many request types `Params` is
// `Option<T>` and `None` should be encoded as *absence* of the field.
let params_json = serde_json::to_value(&params)?;
let params_field = if params_json.is_null() {
None
} else {
Some(params_json)
};
let method = N::METHOD.to_string();
let jsonrpc_notification = JSONRPCNotification {
jsonrpc: JSONRPC_VERSION.to_string(),
method: method.clone(),
params: params_field,
};
let notification = JSONRPCMessage::Notification(jsonrpc_notification);
self.outgoing_tx
.send(notification)
.await
.with_context(|| format!("failed to send notification `{method}` to writer task"))
}
/// Negotiates the initialization with the MCP server. Sends an `initialize`
/// request with the specified `initialize_params` and then the
/// `notifications/initialized` notification once the response has been
/// received. Returns the response to the `initialize` request.
pub async fn initialize(
&self,
initialize_params: InitializeRequestParams,
timeout: Option<Duration>,
) -> Result<mcp_types::InitializeResult> {
let response = self
.send_request::<InitializeRequest>(initialize_params, timeout)
.await?;
self.send_notification::<InitializedNotification>(None)
.await?;
Ok(response)
}
/// Convenience wrapper around `tools/list`.
pub async fn list_tools(
&self,
params: Option<ListToolsRequestParams>,
timeout: Option<Duration>,
) -> Result<ListToolsResult> {
self.send_request::<ListToolsRequest>(params, timeout).await
}
/// Convenience wrapper around `tools/call`.
pub async fn call_tool(
&self,
name: String,
arguments: Option<serde_json::Value>,
timeout: Option<Duration>,
) -> Result<mcp_types::CallToolResult> {
let params = CallToolRequestParams { name, arguments };
debug!("MCP tool call: {params:?}");
self.send_request::<CallToolRequest>(params, timeout).await
}
/// Internal helper: route a JSON-RPC *response* object to the pending map.
async fn dispatch_response(
resp: JSONRPCResponse,
pending: &Arc<Mutex<HashMap<i64, PendingSender>>>,
) {
let id = match resp.id {
RequestId::Integer(i) => i,
RequestId::String(_) => {
// We only ever generate integer IDs. Receiving a string here
// means we will not find a matching entry in `pending`.
error!("response with string ID - no matching pending request");
return;
}
};
let tx_opt = {
let mut guard = pending.lock().await;
guard.remove(&id)
};
if let Some(tx) = tx_opt {
// Ignore send errors the receiver might have been dropped.
let _ = tx.send(JSONRPCMessage::Response(resp));
} else {
warn!(id, "no pending request found for response");
}
}
/// Internal helper: route a JSON-RPC *error* object to the pending map.
async fn dispatch_error(
err: mcp_types::JSONRPCError,
pending: &Arc<Mutex<HashMap<i64, PendingSender>>>,
) {
let id = match err.id {
RequestId::Integer(i) => i,
RequestId::String(_) => return, // see comment above
};
let tx_opt = {
let mut guard = pending.lock().await;
guard.remove(&id)
};
if let Some(tx) = tx_opt {
let _ = tx.send(JSONRPCMessage::Error(err));
}
}
}
impl Drop for McpClient {
fn drop(&mut self) {
// Even though we have already tagged this process with
// `kill_on_drop(true)` above, this extra check has the benefit of
// forcing the process to be reaped immediately if it has already exited
// instead of waiting for the Tokio runtime to reap it later.
let _ = self.child.try_wait();
}
}
/// Environment variables that are always included when spawning a new MCP
/// server.
#[rustfmt::skip]
#[cfg(unix)]
const DEFAULT_ENV_VARS: &[&str] = &[
// https://modelcontextprotocol.io/docs/tools/debugging#environment-variables
// states:
//
// > MCP servers inherit only a subset of environment variables automatically,
// > like `USER`, `HOME`, and `PATH`.
//
// But it does not fully enumerate the list. Empirically, when spawning a
// an MCP server via Claude Desktop on macOS, it reports the following
// environment variables:
"HOME",
"LOGNAME",
"PATH",
"SHELL",
"USER",
"__CF_USER_TEXT_ENCODING",
// Additional environment variables Codex chooses to include by default:
"LANG",
"LC_ALL",
"TERM",
"TMPDIR",
"TZ",
];
#[cfg(windows)]
const DEFAULT_ENV_VARS: &[&str] = &[
// TODO: More research is necessary to curate this list.
"PATH",
"PATHEXT",
"USERNAME",
"USERDOMAIN",
"USERPROFILE",
"TEMP",
"TMP",
];
/// `extra_env` comes from the config for an entry in `mcp_servers` in
/// `config.toml`.
fn create_env_for_mcp_server(
extra_env: Option<HashMap<String, String>>,
env_vars: &[String],
) -> HashMap<String, String> {
DEFAULT_ENV_VARS
.iter()
.copied()
.chain(env_vars.iter().map(String::as_str))
.filter_map(|var| {
std::env::var(var)
.ok()
.map(|value| (var.to_string(), value))
})
.chain(extra_env.unwrap_or_default())
.collect::<HashMap<_, _>>()
}
#[cfg(test)]
mod tests {
use super::*;
fn set_env_var(key: &str, value: &str) {
unsafe {
std::env::set_var(key, value);
}
}
fn remove_env_var(key: &str) {
unsafe {
std::env::remove_var(key);
}
}
#[test]
fn test_create_env_for_mcp_server() {
let env_var = "USER";
let env_var_existing_value = std::env::var(env_var).unwrap_or_default();
let env_var_new_value = format!("{env_var_existing_value}-extra");
let extra_env = HashMap::from([(env_var.to_owned(), env_var_new_value.clone())]);
let mcp_server_env = create_env_for_mcp_server(Some(extra_env), &[]);
assert!(mcp_server_env.contains_key("PATH"));
assert_eq!(Some(&env_var_new_value), mcp_server_env.get(env_var));
}
#[test]
fn test_create_env_for_mcp_server_includes_extra_whitelisted_vars() {
let custom_var = "CUSTOM_TEST_VAR";
let value = "value".to_string();
set_env_var(custom_var, &value);
let mcp_server_env = create_env_for_mcp_server(None, &[custom_var.to_string()]);
assert_eq!(Some(&value), mcp_server_env.get(custom_var));
remove_env_var(custom_var);
}
}

View File

@@ -1,9 +1,3 @@
use crate::protocol::AgentMessageEvent;
use crate::protocol::AgentReasoningEvent;
use crate::protocol::AgentReasoningRawContentEvent;
use crate::protocol::EventMsg;
use crate::protocol::UserMessageEvent;
use crate::protocol::WebSearchEndEvent;
use crate::user_input::UserInput;
use schemars::JsonSchema;
use serde::Deserialize;
@@ -13,9 +7,6 @@ use ts_rs::TS;
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub enum TurnItem {
UserMessage(UserMessageItem),
AgentMessage(AgentMessageItem),
Reasoning(ReasoningItem),
WebSearch(WebSearchItem),
}
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
@@ -24,31 +15,6 @@ pub struct UserMessageItem {
pub content: Vec<UserInput>,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub enum AgentMessageContent {
Text { text: String },
}
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct AgentMessageItem {
pub id: String,
pub content: Vec<AgentMessageContent>,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct ReasoningItem {
pub id: String,
pub summary_text: Vec<String>,
#[serde(default)]
pub raw_content: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct WebSearchItem {
pub id: String,
pub query: String,
}
impl UserMessageItem {
pub fn new(content: &[UserInput]) -> Self {
Self {
@@ -56,104 +22,12 @@ impl UserMessageItem {
content: content.to_vec(),
}
}
pub fn as_legacy_event(&self) -> EventMsg {
EventMsg::UserMessage(UserMessageEvent {
message: self.message(),
images: Some(self.image_urls()),
})
}
pub fn message(&self) -> String {
self.content
.iter()
.map(|c| match c {
UserInput::Text { text } => text.clone(),
_ => String::new(),
})
.collect::<Vec<String>>()
.join("")
}
pub fn image_urls(&self) -> Vec<String> {
self.content
.iter()
.filter_map(|c| match c {
UserInput::Image { image_url } => Some(image_url.clone()),
_ => None,
})
.collect()
}
}
impl AgentMessageItem {
pub fn new(content: &[AgentMessageContent]) -> Self {
Self {
id: uuid::Uuid::new_v4().to_string(),
content: content.to_vec(),
}
}
pub fn as_legacy_events(&self) -> Vec<EventMsg> {
self.content
.iter()
.map(|c| match c {
AgentMessageContent::Text { text } => EventMsg::AgentMessage(AgentMessageEvent {
message: text.clone(),
}),
})
.collect()
}
}
impl ReasoningItem {
pub fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
let mut events = Vec::new();
for summary in &self.summary_text {
events.push(EventMsg::AgentReasoning(AgentReasoningEvent {
text: summary.clone(),
}));
}
if show_raw_agent_reasoning {
for entry in &self.raw_content {
events.push(EventMsg::AgentReasoningRawContent(
AgentReasoningRawContentEvent {
text: entry.clone(),
},
));
}
}
events
}
}
impl WebSearchItem {
pub fn as_legacy_event(&self) -> EventMsg {
EventMsg::WebSearchEnd(WebSearchEndEvent {
call_id: self.id.clone(),
query: self.query.clone(),
})
}
}
impl TurnItem {
pub fn id(&self) -> String {
match self {
TurnItem::UserMessage(item) => item.id.clone(),
TurnItem::AgentMessage(item) => item.id.clone(),
TurnItem::Reasoning(item) => item.id.clone(),
TurnItem::WebSearch(item) => item.id.clone(),
}
}
pub fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
match self {
TurnItem::UserMessage(item) => vec![item.as_legacy_event()],
TurnItem::AgentMessage(item) => item.as_legacy_events(),
TurnItem::WebSearch(item) => vec![item.as_legacy_event()],
TurnItem::Reasoning(item) => item.as_legacy_events(show_raw_agent_reasoning),
}
}
}

View File

@@ -770,13 +770,69 @@ pub struct AgentMessageEvent {
pub message: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum InputMessageKind {
/// Plain user text (default)
Plain,
/// XML-wrapped user instructions (<user_instructions>...)
UserInstructions,
/// XML-wrapped environment context (<environment_context>...)
EnvironmentContext,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct UserMessageEvent {
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub kind: Option<InputMessageKind>,
#[serde(skip_serializing_if = "Option::is_none")]
pub images: Option<Vec<String>>,
}
impl<T, U> From<(T, U)> for InputMessageKind
where
T: AsRef<str>,
U: AsRef<str>,
{
fn from(value: (T, U)) -> Self {
let (_role, message) = value;
let message = message.as_ref();
let trimmed = message.trim();
if starts_with_ignore_ascii_case(trimmed, ENVIRONMENT_CONTEXT_OPEN_TAG)
&& ends_with_ignore_ascii_case(trimmed, ENVIRONMENT_CONTEXT_CLOSE_TAG)
{
InputMessageKind::EnvironmentContext
} else if starts_with_ignore_ascii_case(trimmed, USER_INSTRUCTIONS_OPEN_TAG)
&& ends_with_ignore_ascii_case(trimmed, USER_INSTRUCTIONS_CLOSE_TAG)
{
InputMessageKind::UserInstructions
} else {
InputMessageKind::Plain
}
}
}
fn starts_with_ignore_ascii_case(text: &str, prefix: &str) -> bool {
let text_bytes = text.as_bytes();
let prefix_bytes = prefix.as_bytes();
text_bytes.len() >= prefix_bytes.len()
&& text_bytes
.iter()
.zip(prefix_bytes.iter())
.all(|(a, b)| a.eq_ignore_ascii_case(b))
}
fn ends_with_ignore_ascii_case(text: &str, suffix: &str) -> bool {
let text_bytes = text.as_bytes();
let suffix_bytes = suffix.as_bytes();
text_bytes.len() >= suffix_bytes.len()
&& text_bytes[text_bytes.len() - suffix_bytes.len()..]
.iter()
.zip(suffix_bytes.iter())
.all(|(a, b)| a.eq_ignore_ascii_case(b))
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentMessageDeltaEvent {
pub delta: String,
@@ -919,7 +975,6 @@ pub enum SessionSource {
VSCode,
Exec,
Mcp,
SubAgent,
#[serde(other)]
Unknown,
}

View File

@@ -37,7 +37,6 @@ pub async fn perform_oauth_login(
store_mode: OAuthCredentialsStoreMode,
http_headers: Option<HashMap<String, String>>,
env_http_headers: Option<HashMap<String, String>>,
scopes: &[String],
) -> Result<()> {
let server = Arc::new(Server::http("127.0.0.1:0").map_err(|err| anyhow!(err))?);
let guard = CallbackServerGuard {
@@ -62,9 +61,8 @@ pub async fn perform_oauth_login(
let http_client = apply_default_headers(ClientBuilder::new(), &default_headers).build()?;
let mut oauth_state = OAuthState::new(server_url, Some(http_client)).await?;
let scope_refs: Vec<&str> = scopes.iter().map(String::as_str).collect();
oauth_state
.start_authorization(&scope_refs, &redirect_uri, Some("Codex"))
.start_authorization(&[], &redirect_uri, Some("Codex"))
.await?;
let auth_url = oauth_state.get_authorization_url().await?;

View File

@@ -23,6 +23,7 @@ use codex_core::protocol::ExecApprovalRequestEvent;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::ExitedReviewModeEvent;
use codex_core::protocol::InputMessageKind;
use codex_core::protocol::ListCustomPromptsResponseEvent;
use codex_core::protocol::McpListToolsResponseEvent;
use codex_core::protocol::McpToolCallBeginEvent;
@@ -1552,9 +1553,17 @@ impl ChatWidget {
}
fn on_user_message_event(&mut self, event: UserMessageEvent) {
let message = event.message.trim();
if !message.is_empty() {
self.add_to_history(history_cell::new_user_prompt(message.to_string()));
match event.kind {
Some(InputMessageKind::EnvironmentContext)
| Some(InputMessageKind::UserInstructions) => {
// Skip XMLwrapped context blocks in the transcript.
}
Some(InputMessageKind::Plain) | None => {
let message = event.message.trim();
if !message.is_empty() {
self.add_to_history(history_cell::new_user_prompt(message.to_string()));
}
}
}
}

View File

@@ -23,6 +23,7 @@ use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::ExitedReviewModeEvent;
use codex_core::protocol::FileChange;
use codex_core::protocol::InputMessageKind;
use codex_core::protocol::Op;
use codex_core::protocol::PatchApplyBeginEvent;
use codex_core::protocol::PatchApplyEndEvent;
@@ -103,6 +104,7 @@ fn resumed_initial_messages_render_history() {
initial_messages: Some(vec![
EventMsg::UserMessage(UserMessageEvent {
message: "hello from user".to_string(),
kind: Some(InputMessageKind::Plain),
images: None,
}),
EventMsg::AgentMessage(AgentMessageEvent {

View File

@@ -10,7 +10,6 @@ use codex_core::ConversationsPage;
use codex_core::Cursor;
use codex_core::INTERACTIVE_SESSION_SOURCES;
use codex_core::RolloutRecorder;
use codex_protocol::items::TurnItem;
use color_eyre::eyre::Result;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
@@ -31,7 +30,10 @@ use crate::text_formatting::truncate_text;
use crate::tui::FrameRequester;
use crate::tui::Tui;
use crate::tui::TuiEvent;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::InputMessageKind;
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
const PAGE_SIZE: usize = 25;
const LOAD_NEAR_THRESHOLD: usize = 5;
@@ -614,8 +616,37 @@ fn extract_timestamp(value: &serde_json::Value) -> Option<DateTime<Utc>> {
fn preview_from_head(head: &[serde_json::Value]) -> Option<String> {
head.iter()
.filter_map(|value| serde_json::from_value::<ResponseItem>(value.clone()).ok())
.find_map(|item| match codex_core::parse_turn_item(&item) {
Some(TurnItem::UserMessage(user)) => Some(user.message()),
.find_map(|item| match item {
ResponseItem::Message { content, .. } => {
// Find the actual user message (as opposed to user instructions or ide context)
let preview = content
.into_iter()
.filter_map(|content| match content {
ContentItem::InputText { text }
if matches!(
InputMessageKind::from(("user", text.as_str())),
InputMessageKind::Plain
) =>
{
// Strip ide context.
let text = match text.find(USER_MESSAGE_BEGIN) {
Some(idx) => {
text[idx + USER_MESSAGE_BEGIN.len()..].trim().to_string()
}
None => text,
};
Some(text)
}
_ => None,
})
.collect::<String>();
if preview.is_empty() {
None
} else {
Some(preview)
}
}
_ => None,
})
}
@@ -968,19 +999,6 @@ mod tests {
"role": "user",
"content": [
{ "type": "input_text", "text": "<user_instructions>hi</user_instructions>" },
]
}),
json!({
"type": "message",
"role": "user",
"content": [
{ "type": "input_text", "text": "<environment_context>...</environment_context>" },
]
}),
json!({
"type": "message",
"role": "user",
"content": [
{ "type": "input_text", "text": "real question" },
{ "type": "input_image", "image_url": "ignored" }
]

View File

@@ -44,6 +44,7 @@ If the repository root contains `TEAM_GUIDE.md` and the `backend/` directory con
You can configure those fallbacks in `~/.codex/config.toml` (or another profile) like this:
```toml
[project]
project_doc_fallback_filenames = ["TEAM_GUIDE.md", ".agents.md"]
```

View File

@@ -451,7 +451,11 @@ When both `enabled_tools` and `disabled_tools` are specified, Codex first restri
#### Experimental RMCP client
This flag enables OAuth support for streamable HTTP servers.
Codex is transitioning to the [official Rust MCP SDK](https://github.com/modelcontextprotocol/rust-sdk).
This flag enables OAuth support for streamable HTTP servers and switches STDIO servers over to the new client implementation.
Please try and report issues with the new client. To enable it, add this to the top level of your `config.toml`
```toml
experimental_use_rmcp_client = true