Merge branch 'main' into codex/add-process-id-to-logging

This commit is contained in:
marina-oai
2026-01-20 17:02:55 +09:00
committed by GitHub
121 changed files with 7165 additions and 1060 deletions

View File

@@ -18,7 +18,7 @@ workspace = true
[dependencies]
anyhow = { workspace = true }
arc-swap = "1.7.1"
arc-swap = "1.8.0"
async-channel = { workspace = true }
async-trait = { workspace = true }
base64 = { workspace = true }

View File

@@ -58,7 +58,7 @@ impl AgentControl {
Op::UserInput {
items: vec![UserInput::Text {
text: prompt,
// Plain text conversion has no UI element ranges.
// Agent control prompts are plain text with no UI text elements.
text_elements: Vec::new(),
}],
final_output_json_schema: None,
@@ -85,7 +85,6 @@ impl AgentControl {
result
}
#[allow(dead_code)] // Will be used for collab tools.
/// Fetch the last known status for `agent_id`, returning `NotFound` when unavailable.
pub(crate) async fn get_status(&self, agent_id: ThreadId) -> AgentStatus {
let Ok(state) = self.upgrade() else {

View File

@@ -217,9 +217,7 @@ impl ModelClient {
let client = ApiCompactClient::new(transport, api_provider, api_auth)
.with_telemetry(Some(request_telemetry));
let instructions = prompt
.get_full_instructions(&self.state.model_info)
.into_owned();
let instructions = prompt.base_instructions.text.clone();
let payload = ApiCompactionInput {
model: &self.state.model_info.slug,
input: &prompt.input,
@@ -276,8 +274,7 @@ impl ModelClientSession {
}
fn build_responses_request(&self, prompt: &Prompt) -> Result<ApiPrompt> {
let model_info = self.state.model_info.clone();
let instructions = prompt.get_full_instructions(&model_info).into_owned();
let instructions = prompt.base_instructions.text.clone();
let tools_json: Vec<Value> = create_tools_json_for_responses_api(&prompt.tools)?;
Ok(build_api_prompt(prompt, instructions, tools_json))
}
@@ -448,8 +445,7 @@ impl ModelClientSession {
}
let auth_manager = self.state.auth_manager.clone();
let model_info = self.state.model_info.clone();
let instructions = prompt.get_full_instructions(&model_info).into_owned();
let instructions = prompt.base_instructions.text.clone();
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
let api_prompt = build_api_prompt(prompt, instructions, tools_json);
let conversation_id = self.state.conversation_id.to_string();

View File

@@ -1,12 +1,11 @@
use crate::client_common::tools::ToolSpec;
use crate::error::Result;
pub use codex_api::common::ResponseEvent;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ModelInfo;
use futures::Stream;
use serde::Deserialize;
use serde_json::Value;
use std::borrow::Cow;
use std::collections::HashSet;
use std::pin::Pin;
use std::task::Context;
@@ -34,22 +33,13 @@ pub struct Prompt {
/// Whether parallel tool calls are permitted for this prompt.
pub(crate) parallel_tool_calls: bool,
/// Optional override for the built-in BASE_INSTRUCTIONS.
pub base_instructions_override: Option<String>,
pub base_instructions: BaseInstructions,
/// Optional the output schema for the model's response.
pub output_schema: Option<Value>,
}
impl Prompt {
pub(crate) fn get_full_instructions<'a>(&'a self, model: &'a ModelInfo) -> Cow<'a, str> {
Cow::Borrowed(
self.base_instructions_override
.as_deref()
.unwrap_or(model.base_instructions.as_str()),
)
}
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
let mut input = self.input.clone();
@@ -245,76 +235,8 @@ mod tests {
use codex_api::create_text_param_for_request;
use pretty_assertions::assert_eq;
use crate::config::test_config;
use crate::models_manager::manager::ModelsManager;
use super::*;
struct InstructionsTestCase {
pub slug: &'static str,
pub expects_apply_patch_instructions: bool,
}
#[test]
fn get_full_instructions_no_user_content() {
let prompt = Prompt {
..Default::default()
};
let prompt_with_apply_patch_instructions =
include_str!("../prompt_with_apply_patch_instructions.md");
let test_cases = vec![
InstructionsTestCase {
slug: "gpt-3.5",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-4.1",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-4o",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-5",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-5.1",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "codex-mini-latest",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-oss:120b",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5.1-codex",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5.1-codex-max",
expects_apply_patch_instructions: false,
},
];
for test_case in test_cases {
let config = test_config();
let model_info = ModelsManager::construct_model_info_offline(test_case.slug, &config);
if test_case.expects_apply_patch_instructions {
assert_eq!(
model_info.base_instructions.as_str(),
prompt_with_apply_patch_instructions
);
}
let expected = model_info.base_instructions.as_str();
let full = prompt.get_full_instructions(&model_info);
assert_eq!(full, expected);
}
}
#[test]
fn serializes_text_verbosity_when_set() {
let input: Vec<ResponseItem> = vec![];

View File

@@ -12,7 +12,6 @@ use crate::SandboxState;
use crate::agent::AgentControl;
use crate::agent::AgentStatus;
use crate::agent::agent_status_from_event;
use crate::client_common::REVIEW_PROMPT;
use crate::compact;
use crate::compact::run_inline_auto_compact_task;
use crate::compact::should_use_remote_compact_task;
@@ -34,9 +33,11 @@ use async_channel::Receiver;
use async_channel::Sender;
use codex_protocol::ThreadId;
use codex_protocol::approvals::ExecPolicyAmendment;
use codex_protocol::config_types::Settings;
use codex_protocol::config_types::WebSearchMode;
use codex_protocol::items::TurnItem;
use codex_protocol::items::UserMessageItem;
use codex_protocol::models::BaseInstructions;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::HasLegacyEvent;
@@ -49,6 +50,8 @@ use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnContextItem;
use codex_protocol::protocol::TurnStartedEvent;
use codex_protocol::request_user_input::RequestUserInputArgs;
use codex_protocol::request_user_input::RequestUserInputResponse;
use codex_rmcp_client::ElicitationResponse;
use codex_rmcp_client::OAuthCredentialsStoreMode;
use futures::future::BoxFuture;
@@ -118,6 +121,7 @@ use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReasoningContentDeltaEvent;
use crate::protocol::ReasoningRawContentDeltaEvent;
use crate::protocol::RequestUserInputEvent;
use crate::protocol::ReviewDecision;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
@@ -162,7 +166,6 @@ use codex_async_utils::OrCancelExt;
use codex_otel::OtelManager;
use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::config_types::Settings;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DeveloperInstructions;
use codex_protocol::models::ResponseInputItem;
@@ -272,6 +275,18 @@ impl Codex {
crate::models_manager::manager::RefreshStrategy::OnlineIfUncached,
)
.await;
// Resolve base instructions for the session. Priority order:
// 1. config.base_instructions override
// 2. conversation history => session_meta.base_instructions
// 3. base_intructions for current model
let model_info = models_manager.get_model_info(model.as_str(), &config).await;
let base_instructions = config
.base_instructions
.clone()
.or_else(|| conversation_history.get_base_instructions().map(|s| s.text))
.unwrap_or_else(|| model_info.base_instructions.clone());
// TODO (aibrahim): Consolidate config.model and config.model_reasoning_effort into config.collaboration_mode
// to avoid extracting these fields separately and constructing CollaborationMode here.
let collaboration_mode = CollaborationMode::Custom(Settings {
@@ -285,7 +300,7 @@ impl Codex {
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
user_instructions,
base_instructions: config.base_instructions.clone(),
base_instructions,
compact_prompt: config.compact_prompt.clone(),
approval_policy: config.approval_policy.clone(),
sandbox_policy: config.sandbox_policy.clone(),
@@ -397,7 +412,6 @@ pub(crate) struct TurnContext {
/// instead of `std::env::current_dir()`.
pub(crate) cwd: PathBuf,
pub(crate) developer_instructions: Option<String>,
pub(crate) base_instructions: Option<String>,
pub(crate) compact_prompt: Option<String>,
pub(crate) user_instructions: Option<String>,
pub(crate) approval_policy: AskForApproval,
@@ -439,8 +453,8 @@ pub(crate) struct SessionConfiguration {
/// Model instructions that are appended to the base instructions.
user_instructions: Option<String>,
/// Base instructions override.
base_instructions: Option<String>,
/// Base instructions for the session.
base_instructions: String,
/// Compact prompt override.
compact_prompt: Option<String>,
@@ -525,7 +539,6 @@ impl Session {
session_configuration.collaboration_mode.model(),
model_info.slug.as_str(),
);
let per_turn_config = Arc::new(per_turn_config);
let client = ModelClient::new(
per_turn_config.clone(),
@@ -550,7 +563,6 @@ impl Session {
client,
cwd: session_configuration.cwd.clone(),
developer_instructions: session_configuration.developer_instructions.clone(),
base_instructions: session_configuration.base_instructions.clone(),
compact_prompt: session_configuration.compact_prompt.clone(),
user_instructions: session_configuration.user_instructions.clone(),
approval_policy: session_configuration.approval_policy.value(),
@@ -598,7 +610,14 @@ impl Session {
let conversation_id = ThreadId::default();
(
conversation_id,
RolloutRecorderParams::new(conversation_id, forked_from_id, session_source),
RolloutRecorderParams::new(
conversation_id,
forked_from_id,
session_source,
BaseInstructions {
text: session_configuration.base_instructions.clone(),
},
),
)
}
InitialHistory::Resumed(resumed_history) => (
@@ -807,7 +826,14 @@ impl Session {
async fn get_total_token_usage(&self) -> i64 {
let state = self.state.lock().await;
state.get_total_token_usage()
state.get_total_token_usage(state.server_reasoning_included())
}
pub(crate) async fn get_base_instructions(&self) -> BaseInstructions {
let state = self.state.lock().await;
BaseInstructions {
text: state.session_configuration.base_instructions.clone(),
}
}
async fn record_initial_history(&self, conversation_history: InitialHistory) {
@@ -1297,6 +1323,63 @@ impl Session {
rx_approve
}
pub async fn request_user_input(
&self,
turn_context: &TurnContext,
call_id: String,
args: RequestUserInputArgs,
) -> Option<RequestUserInputResponse> {
let sub_id = turn_context.sub_id.clone();
let (tx_response, rx_response) = oneshot::channel();
let event_id = sub_id.clone();
let prev_entry = {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Some(at) => {
let mut ts = at.turn_state.lock().await;
ts.insert_pending_user_input(sub_id, tx_response)
}
None => None,
}
};
if prev_entry.is_some() {
warn!("Overwriting existing pending user input for sub_id: {event_id}");
}
let event = EventMsg::RequestUserInput(RequestUserInputEvent {
call_id,
turn_id: turn_context.sub_id.clone(),
questions: args.questions,
});
self.send_event(turn_context, event).await;
rx_response.await.ok()
}
pub async fn notify_user_input_response(
&self,
sub_id: &str,
response: RequestUserInputResponse,
) {
let entry = {
let mut active = self.active_turn.lock().await;
match active.as_mut() {
Some(at) => {
let mut ts = at.turn_state.lock().await;
ts.remove_pending_user_input(sub_id)
}
None => None,
}
};
match entry {
Some(tx_response) => {
tx_response.send(response).ok();
}
None => {
warn!("No pending user input found for sub_id: {sub_id}");
}
}
}
pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
let entry = {
let mut active = self.active_turn.lock().await;
@@ -1391,6 +1474,9 @@ impl Session {
}
pub(crate) async fn record_model_warning(&self, message: impl Into<String>, ctx: &TurnContext) {
self.services
.otel_manager
.counter("codex.model_warning", 1, &[]);
let item = ResponseItem::Message {
id: None,
role: "user".to_string(),
@@ -1556,6 +1642,11 @@ impl Session {
self.send_token_count_event(turn_context).await;
}
pub(crate) async fn set_server_reasoning_included(&self, included: bool) {
let mut state = self.state.lock().await;
state.set_server_reasoning_included(included);
}
async fn send_token_count_event(&self, turn_context: &TurnContext) {
let (info, rate_limits) = {
let state = self.state.lock().await;
@@ -1931,6 +2022,9 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
Op::PatchApproval { id, decision } => {
handlers::patch_approval(&sess, id, decision).await;
}
Op::UserInputAnswer { id, response } => {
handlers::request_user_input_response(&sess, id, response).await;
}
Op::AddToHistory { text } => {
handlers::add_to_history(&sess, &config, text).await;
}
@@ -2020,6 +2114,7 @@ mod handlers {
use codex_protocol::protocol::ThreadRolledBackEvent;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::WarningEvent;
use codex_protocol::request_user_input::RequestUserInputResponse;
use crate::context_manager::is_user_turn_boundary;
use codex_protocol::config_types::CollaborationMode;
@@ -2248,6 +2343,14 @@ mod handlers {
}
}
pub async fn request_user_input_response(
sess: &Arc<Session>,
id: String,
response: RequestUserInputResponse,
) {
sess.notify_user_input_response(&id, response).await;
}
pub async fn add_to_history(sess: &Arc<Session>, config: &Arc<Config>, text: String) {
let id = sess.conversation_id;
let config = Arc::clone(config);
@@ -2382,6 +2485,7 @@ mod handlers {
Arc::clone(&turn_context),
vec![UserInput::Text {
text: turn_context.compact_prompt().to_string(),
// Compaction prompt is synthesized; no UI element ranges to preserve.
text_elements: Vec::new(),
}],
CompactTask,
@@ -2541,7 +2645,6 @@ async fn spawn_review_thread(
web_search_mode: Some(review_web_search_mode),
});
let base_instructions = REVIEW_PROMPT.to_string();
let review_prompt = resolved.prompt.clone();
let provider = parent_turn_context.client.get_provider();
let auth_manager = parent_turn_context.client.get_auth_manager();
@@ -2578,7 +2681,6 @@ async fn spawn_review_thread(
ghost_snapshot: parent_turn_context.ghost_snapshot.clone(),
developer_instructions: None,
user_instructions: None,
base_instructions: Some(base_instructions.clone()),
compact_prompt: parent_turn_context.compact_prompt.clone(),
approval_policy: parent_turn_context.approval_policy,
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
@@ -2593,6 +2695,7 @@ async fn spawn_review_thread(
// Seed the child task with the review prompt as the initial user message.
let input: Vec<UserInput> = vec![UserInput::Text {
text: review_prompt,
// Review prompt is synthesized; no UI element ranges to preserve.
text_elements: Vec::new(),
}];
let tc = Arc::new(review_turn_context);
@@ -2856,11 +2959,13 @@ async fn run_sampling_request(
.get_model_info()
.supports_parallel_tool_calls;
let base_instructions = sess.get_base_instructions().await;
let prompt = Prompt {
input,
tools: router.specs(),
parallel_tool_calls: model_supports_parallel,
base_instructions_override: turn_context.base_instructions.clone(),
base_instructions,
output_schema: turn_context.final_output_json_schema.clone(),
};
@@ -2976,7 +3081,6 @@ async fn try_run_sampling_request(
model: turn_context.client.get_model(),
effort: turn_context.client.get_reasoning_effort(),
summary: turn_context.client.get_reasoning_summary(),
base_instructions: turn_context.base_instructions.clone(),
user_instructions: turn_context.user_instructions.clone(),
developer_instructions: turn_context.developer_instructions.clone(),
final_output_json_schema: turn_context.final_output_json_schema.clone(),
@@ -3075,6 +3179,9 @@ async fn try_run_sampling_request(
active_item = Some(tracked_item);
}
}
ResponseEvent::ServerReasoningIncluded(included) => {
sess.set_server_reasoning_included(included).await;
}
ResponseEvent::RateLimits(snapshot) => {
// Update internal state with latest rate limits, but defer sending until
// token usage is available to avoid duplicate TokenCount events.
@@ -3218,6 +3325,7 @@ mod tests {
use super::*;
use crate::CodexAuth;
use crate::config::ConfigBuilder;
use crate::config::test_config;
use crate::exec::ExecToolCallOutput;
use crate::function_tool::FunctionCallError;
use crate::shell::default_user_shell;
@@ -3261,6 +3369,77 @@ mod tests {
use std::sync::Arc;
use std::time::Duration as StdDuration;
struct InstructionsTestCase {
slug: &'static str,
expects_apply_patch_instructions: bool,
}
#[tokio::test]
async fn get_base_instructions_no_user_content() {
let prompt_with_apply_patch_instructions =
include_str!("../prompt_with_apply_patch_instructions.md");
let test_cases = vec![
InstructionsTestCase {
slug: "gpt-3.5",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-4.1",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-4o",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-5",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-5.1",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "codex-mini-latest",
expects_apply_patch_instructions: true,
},
InstructionsTestCase {
slug: "gpt-oss:120b",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5.1-codex",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5.1-codex-max",
expects_apply_patch_instructions: false,
},
];
let (session, _turn_context) = make_session_and_context().await;
for test_case in test_cases {
let config = test_config();
let model_info = ModelsManager::construct_model_info_offline(test_case.slug, &config);
if test_case.expects_apply_patch_instructions {
assert_eq!(
model_info.base_instructions.as_str(),
prompt_with_apply_patch_instructions
);
}
{
let mut state = session.state.lock().await;
state.session_configuration.base_instructions =
model_info.base_instructions.clone();
}
let base_instructions = session.get_base_instructions().await;
assert_eq!(base_instructions.text, model_info.base_instructions);
}
}
#[tokio::test]
async fn reconstruct_history_matches_live_compactions() {
let (session, turn_context) = make_session_and_context().await;
@@ -3513,6 +3692,7 @@ mod tests {
let config = build_test_config(codex_home.path()).await;
let config = Arc::new(config);
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config);
let reasoning_effort = config.model_reasoning_effort;
let collaboration_mode = CollaborationMode::Custom(Settings {
model,
@@ -3525,7 +3705,10 @@ mod tests {
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
user_instructions: config.user_instructions.clone(),
base_instructions: config.base_instructions.clone(),
base_instructions: config
.base_instructions
.clone()
.unwrap_or_else(|| model_info.base_instructions.clone()),
compact_prompt: config.compact_prompt.clone(),
approval_policy: config.approval_policy.clone(),
sandbox_policy: config.sandbox_policy.clone(),
@@ -3584,6 +3767,7 @@ mod tests {
let config = build_test_config(codex_home.path()).await;
let config = Arc::new(config);
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config);
let reasoning_effort = config.model_reasoning_effort;
let collaboration_mode = CollaborationMode::Custom(Settings {
model,
@@ -3596,7 +3780,10 @@ mod tests {
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
user_instructions: config.user_instructions.clone(),
base_instructions: config.base_instructions.clone(),
base_instructions: config
.base_instructions
.clone()
.unwrap_or_else(|| model_info.base_instructions.clone()),
compact_prompt: config.compact_prompt.clone(),
approval_policy: config.approval_policy.clone(),
sandbox_policy: config.sandbox_policy.clone(),
@@ -3840,6 +4027,7 @@ mod tests {
let exec_policy = ExecPolicyManager::default();
let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit);
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config);
let reasoning_effort = config.model_reasoning_effort;
let collaboration_mode = CollaborationMode::Custom(Settings {
model,
@@ -3852,7 +4040,10 @@ mod tests {
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
user_instructions: config.user_instructions.clone(),
base_instructions: config.base_instructions.clone(),
base_instructions: config
.base_instructions
.clone()
.unwrap_or_else(|| model_info.base_instructions.clone()),
compact_prompt: config.compact_prompt.clone(),
approval_policy: config.approval_policy.clone(),
sandbox_policy: config.sandbox_policy.clone(),
@@ -3940,6 +4131,7 @@ mod tests {
let exec_policy = ExecPolicyManager::default();
let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit);
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_info = ModelsManager::construct_model_info_offline(model.as_str(), &config);
let reasoning_effort = config.model_reasoning_effort;
let collaboration_mode = CollaborationMode::Custom(Settings {
model,
@@ -3952,7 +4144,10 @@ mod tests {
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
user_instructions: config.user_instructions.clone(),
base_instructions: config.base_instructions.clone(),
base_instructions: config
.base_instructions
.clone()
.unwrap_or_else(|| model_info.base_instructions.clone()),
compact_prompt: config.compact_prompt.clone(),
approval_policy: config.approval_policy.clone(),
sandbox_policy: config.sandbox_policy.clone(),

View File

@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
@@ -9,9 +10,12 @@ use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecApprovalRequestEvent;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::RequestUserInputEvent;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::Submission;
use codex_protocol::request_user_input::RequestUserInputArgs;
use codex_protocol::request_user_input::RequestUserInputResponse;
use codex_protocol::user_input::UserInput;
use std::time::Duration;
use tokio::time::timeout;
@@ -229,6 +233,20 @@ async fn forward_events(
)
.await;
}
Event {
id,
msg: EventMsg::RequestUserInput(event),
} => {
handle_request_user_input(
&codex,
id,
&parent_session,
&parent_ctx,
event,
&cancel_token,
)
.await;
}
other => {
match tx_sub.send(other).or_cancel(&cancel_token).await {
Ok(Ok(())) => {}
@@ -334,6 +352,55 @@ async fn handle_patch_approval(
let _ = codex.submit(Op::PatchApproval { id, decision }).await;
}
async fn handle_request_user_input(
codex: &Codex,
id: String,
parent_session: &Session,
parent_ctx: &TurnContext,
event: RequestUserInputEvent,
cancel_token: &CancellationToken,
) {
let args = RequestUserInputArgs {
questions: event.questions,
};
let response_fut =
parent_session.request_user_input(parent_ctx, parent_ctx.sub_id.clone(), args);
let response = await_user_input_with_cancel(
response_fut,
parent_session,
&parent_ctx.sub_id,
cancel_token,
)
.await;
let _ = codex.submit(Op::UserInputAnswer { id, response }).await;
}
async fn await_user_input_with_cancel<F>(
fut: F,
parent_session: &Session,
sub_id: &str,
cancel_token: &CancellationToken,
) -> RequestUserInputResponse
where
F: core::future::Future<Output = Option<RequestUserInputResponse>>,
{
tokio::select! {
biased;
_ = cancel_token.cancelled() => {
let empty = RequestUserInputResponse {
answers: HashMap::new(),
};
parent_session
.notify_user_input_response(sub_id, empty.clone())
.await;
empty
}
response = fut => response.unwrap_or_else(|| RequestUserInputResponse {
answers: HashMap::new(),
}),
}
}
/// Await an approval decision, aborting on cancellation.
async fn await_approval_with_cancel<F>(
fut: F,

View File

@@ -46,7 +46,7 @@ pub(crate) async fn run_inline_auto_compact_task(
let prompt = turn_context.compact_prompt().to_string();
let input = vec![UserInput::Text {
text: prompt,
// Plain text conversion has no UI element ranges.
// Compaction prompt is synthesized; no UI element ranges to preserve.
text_elements: Vec::new(),
}];
@@ -90,7 +90,6 @@ async fn run_compact_task_inner(
model: turn_context.client.get_model(),
effort: turn_context.client.get_reasoning_effort(),
summary: turn_context.client.get_reasoning_summary(),
base_instructions: turn_context.base_instructions.clone(),
user_instructions: turn_context.user_instructions.clone(),
developer_instructions: turn_context.developer_instructions.clone(),
final_output_json_schema: turn_context.final_output_json_schema.clone(),
@@ -104,6 +103,7 @@ async fn run_compact_task_inner(
let turn_input_len = turn_input.len();
let prompt = Prompt {
input: turn_input,
base_instructions: sess.get_base_instructions().await,
..Default::default()
};
let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
@@ -316,6 +316,9 @@ async fn drain_to_completed(
sess.record_into_history(std::slice::from_ref(&item), turn_context)
.await;
}
Ok(ResponseEvent::ServerReasoningIncluded(included)) => {
sess.set_server_reasoning_included(included).await;
}
Ok(ResponseEvent::RateLimits(snapshot)) => {
sess.update_rate_limits(turn_context, snapshot).await;
}

View File

@@ -54,7 +54,7 @@ async fn run_remote_compact_task_inner_impl(
input: history.for_prompt(),
tools: vec![],
parallel_tool_calls: false,
base_instructions_override: turn_context.base_instructions.clone(),
base_instructions: sess.get_base_instructions().await,
output_schema: None,
};

View File

@@ -1,13 +1,14 @@
use crate::config::CONFIG_TOML_FILE;
use crate::config::types::McpServerConfig;
use crate::config::types::Notice;
use crate::path_utils::resolve_symlink_write_paths;
use crate::path_utils::write_atomically;
use anyhow::Context;
use codex_protocol::config_types::TrustLevel;
use codex_protocol::openai_models::ReasoningEffort;
use std::collections::BTreeMap;
use std::path::Path;
use std::path::PathBuf;
use tempfile::NamedTempFile;
use tokio::task;
use toml_edit::ArrayOfTables;
use toml_edit::DocumentMut;
@@ -625,10 +626,14 @@ pub fn apply_blocking(
}
let config_path = codex_home.join(CONFIG_TOML_FILE);
let serialized = match std::fs::read_to_string(&config_path) {
Ok(contents) => contents,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => String::new(),
Err(err) => return Err(err.into()),
let write_paths = resolve_symlink_write_paths(&config_path)?;
let serialized = match write_paths.read_path {
Some(path) => match std::fs::read_to_string(&path) {
Ok(contents) => contents,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => String::new(),
Err(err) => return Err(err.into()),
},
None => String::new(),
};
let doc = if serialized.is_empty() {
@@ -654,22 +659,13 @@ pub fn apply_blocking(
return Ok(());
}
std::fs::create_dir_all(codex_home).with_context(|| {
write_atomically(&write_paths.write_path, &document.doc.to_string()).with_context(|| {
format!(
"failed to create Codex home directory at {}",
codex_home.display()
"failed to persist config.toml at {}",
write_paths.write_path.display()
)
})?;
let tmp = NamedTempFile::new_in(codex_home)?;
std::fs::write(tmp.path(), document.doc.to_string()).with_context(|| {
format!(
"failed to write temporary config file at {}",
tmp.path().display()
)
})?;
tmp.persist(config_path)?;
Ok(())
}
@@ -813,6 +809,8 @@ mod tests {
use crate::config::types::McpServerTransportConfig;
use codex_protocol::openai_models::ReasoningEffort;
use pretty_assertions::assert_eq;
#[cfg(unix)]
use std::os::unix::fs::symlink;
use tempfile::tempdir;
use toml::Value as TomlValue;
@@ -952,6 +950,71 @@ profiles = { fast = { model = "gpt-4o", sandbox_mode = "strict" } }
);
}
#[cfg(unix)]
#[test]
fn blocking_set_model_writes_through_symlink_chain() {
let tmp = tempdir().expect("tmpdir");
let codex_home = tmp.path();
let target_dir = tempdir().expect("target dir");
let target_path = target_dir.path().join(CONFIG_TOML_FILE);
let link_path = codex_home.join("config-link.toml");
let config_path = codex_home.join(CONFIG_TOML_FILE);
symlink(&target_path, &link_path).expect("symlink link");
symlink("config-link.toml", &config_path).expect("symlink config");
apply_blocking(
codex_home,
None,
&[ConfigEdit::SetModel {
model: Some("gpt-5.1-codex".to_string()),
effort: Some(ReasoningEffort::High),
}],
)
.expect("persist");
let meta = std::fs::symlink_metadata(&config_path).expect("config metadata");
assert!(meta.file_type().is_symlink());
let contents = std::fs::read_to_string(&target_path).expect("read target");
let expected = r#"model = "gpt-5.1-codex"
model_reasoning_effort = "high"
"#;
assert_eq!(contents, expected);
}
#[cfg(unix)]
#[test]
fn blocking_set_model_replaces_symlink_on_cycle() {
let tmp = tempdir().expect("tmpdir");
let codex_home = tmp.path();
let link_a = codex_home.join("a.toml");
let link_b = codex_home.join("b.toml");
let config_path = codex_home.join(CONFIG_TOML_FILE);
symlink("b.toml", &link_a).expect("symlink a");
symlink("a.toml", &link_b).expect("symlink b");
symlink("a.toml", &config_path).expect("symlink config");
apply_blocking(
codex_home,
None,
&[ConfigEdit::SetModel {
model: Some("gpt-5.1-codex".to_string()),
effort: None,
}],
)
.expect("persist");
let meta = std::fs::symlink_metadata(&config_path).expect("config metadata");
assert!(!meta.file_type().is_symlink());
let contents = std::fs::read_to_string(&config_path).expect("read config");
let expected = r#"model = "gpt-5.1-codex"
"#;
assert_eq!(contents, expected);
}
#[test]
fn batch_write_table_upsert_preserves_inline_comments() {
let tmp = tempdir().expect("tmpdir");

View File

@@ -1,4 +1,6 @@
use crate::auth::AuthCredentialsStoreMode;
use crate::config::edit::ConfigEdit;
use crate::config::edit::ConfigEditsBuilder;
use crate::config::types::DEFAULT_OTEL_ENVIRONMENT;
use crate::config::types::History;
use crate::config::types::McpServerConfig;
@@ -751,30 +753,17 @@ pub fn set_default_oss_provider(codex_home: &Path, provider: &str) -> std::io::R
));
}
}
let config_path = codex_home.join(CONFIG_TOML_FILE);
// Read existing config or create empty string if file doesn't exist
let content = match std::fs::read_to_string(&config_path) {
Ok(content) => content,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(e),
};
// Parse as DocumentMut for editing while preserving structure
let mut doc = content.parse::<DocumentMut>().map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("failed to parse config.toml: {e}"),
)
})?;
// Set the default_oss_provider at root level
use toml_edit::value;
doc["oss_provider"] = value(provider);
// Write the modified document back
std::fs::write(&config_path, doc.to_string())?;
Ok(())
let edits = [ConfigEdit::SetPath {
segments: vec!["oss_provider".to_string()],
value: value(provider),
}];
ConfigEditsBuilder::new(codex_home)
.with_edits(edits)
.apply_blocking()
.map_err(|err| std::io::Error::other(format!("failed to persist config.toml: {err}")))
}
/// Base config deserialized from ~/.codex/config.toml.

View File

@@ -9,6 +9,9 @@ use crate::config_loader::LoaderOverrides;
use crate::config_loader::load_config_layers_state;
use crate::config_loader::merge_toml_values;
use crate::path_utils;
use crate::path_utils::SymlinkWritePaths;
use crate::path_utils::resolve_symlink_write_paths;
use crate::path_utils::write_atomically;
use codex_app_server_protocol::Config as ApiConfig;
use codex_app_server_protocol::ConfigBatchWriteParams;
use codex_app_server_protocol::ConfigLayerMetadata;
@@ -27,6 +30,7 @@ use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;
use tokio::task;
use toml::Value as TomlValue;
use toml_edit::Item as TomlItem;
@@ -362,19 +366,30 @@ impl ConfigService {
async fn create_empty_user_layer(
config_toml: &AbsolutePathBuf,
) -> Result<ConfigLayerEntry, ConfigServiceError> {
let toml_value = match tokio::fs::read_to_string(config_toml).await {
Ok(contents) => toml::from_str(&contents).map_err(|e| {
ConfigServiceError::toml("failed to parse existing user config.toml", e)
})?,
Err(e) => {
if e.kind() == std::io::ErrorKind::NotFound {
tokio::fs::write(config_toml, "").await.map_err(|e| {
ConfigServiceError::io("failed to create empty user config.toml", e)
})?;
let SymlinkWritePaths {
read_path,
write_path,
} = resolve_symlink_write_paths(config_toml.as_path())
.map_err(|err| ConfigServiceError::io("failed to resolve user config path", err))?;
let toml_value = match read_path {
Some(path) => match tokio::fs::read_to_string(&path).await {
Ok(contents) => toml::from_str(&contents).map_err(|e| {
ConfigServiceError::toml("failed to parse existing user config.toml", e)
})?,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
write_empty_user_config(write_path.clone()).await?;
TomlValue::Table(toml::map::Map::new())
} else {
return Err(ConfigServiceError::io("failed to read user config.toml", e));
}
Err(err) => {
return Err(ConfigServiceError::io(
"failed to read user config.toml",
err,
));
}
},
None => {
write_empty_user_config(write_path).await?;
TomlValue::Table(toml::map::Map::new())
}
};
Ok(ConfigLayerEntry::new(
@@ -385,6 +400,13 @@ async fn create_empty_user_layer(
))
}
async fn write_empty_user_config(write_path: PathBuf) -> Result<(), ConfigServiceError> {
task::spawn_blocking(move || write_atomically(&write_path, ""))
.await
.map_err(|err| ConfigServiceError::anyhow("config persistence task panicked", err.into()))?
.map_err(|err| ConfigServiceError::io("failed to create empty user config.toml", err))
}
fn parse_value(value: JsonValue) -> Result<Option<TomlValue>, String> {
if value.is_null() {
return Ok(None);

View File

@@ -235,12 +235,19 @@ impl ContextManager {
token_estimate as usize
}
pub(crate) fn get_total_token_usage(&self) -> i64 {
self.token_info
/// When true, the server already accounted for past reasoning tokens and
/// the client should not re-estimate them.
pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 {
let last_tokens = self
.token_info
.as_ref()
.map(|info| info.last_token_usage.total_tokens)
.unwrap_or(0)
.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
.unwrap_or(0);
if server_reasoning_included {
last_tokens
} else {
last_tokens.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
}
}
/// This function enforces a couple of invariants on the in-memory history:

View File

@@ -52,7 +52,7 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
}
content.push(UserInput::Text {
text: text.clone(),
// Plain text conversion has no UI element ranges.
// Model input content does not carry UI element ranges.
text_elements: Vec::new(),
});
}

View File

@@ -21,28 +21,33 @@ pub(crate) use legacy::legacy_feature_keys;
/// High-level lifecycle stage for a feature.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Stage {
Experimental,
Beta {
/// Closed beta features to be used while developing or within the company.
Beta,
/// Experimental features made available to users through the `/experimental` menu
Experimental {
name: &'static str,
menu_description: &'static str,
announcement: &'static str,
},
/// Stable features. The feature flag is kept for ad-hoc enabling/disabling
Stable,
/// Deprecated feature that should not be used anymore.
Deprecated,
/// The feature flag is useless but kept for backward compatibility reason.
Removed,
}
impl Stage {
pub fn beta_menu_name(self) -> Option<&'static str> {
match self {
Stage::Beta { name, .. } => Some(name),
Stage::Experimental { name, .. } => Some(name),
_ => None,
}
}
pub fn beta_menu_description(self) -> Option<&'static str> {
match self {
Stage::Beta {
Stage::Experimental {
menu_description, ..
} => Some(menu_description),
_ => None,
@@ -51,7 +56,7 @@ impl Stage {
pub fn beta_announcement(self) -> Option<&'static str> {
match self {
Stage::Beta { announcement, .. } => Some(announcement),
Stage::Experimental { announcement, .. } => Some(announcement),
_ => None,
}
}
@@ -336,14 +341,14 @@ pub const FEATURES: &[FeatureSpec] = &[
FeatureSpec {
id: Feature::WebSearchCached,
key: "web_search_cached",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
// Beta program. Rendered in the `/experimental` menu for users.
FeatureSpec {
id: Feature::UnifiedExec,
key: "unified_exec",
stage: Stage::Beta {
stage: Stage::Experimental {
name: "Background terminal",
menu_description: "Run long-running terminal commands in the background.",
announcement: "NEW! Try Background terminals for long-running commands. Enable in /experimental!",
@@ -353,7 +358,7 @@ pub const FEATURES: &[FeatureSpec] = &[
FeatureSpec {
id: Feature::ShellSnapshot,
key: "shell_snapshot",
stage: Stage::Beta {
stage: Stage::Experimental {
name: "Shell snapshot",
menu_description: "Snapshot your shell environment to avoid re-running login scripts for every command.",
announcement: "NEW! Try shell snapshotting to make your Codex faster. Enable in /experimental!",
@@ -363,50 +368,50 @@ pub const FEATURES: &[FeatureSpec] = &[
FeatureSpec {
id: Feature::ChildAgentsMd,
key: "child_agents_md",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::ApplyPatchFreeform,
key: "apply_patch_freeform",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::ExecPolicy,
key: "exec_policy",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: true,
},
FeatureSpec {
id: Feature::WindowsSandbox,
key: "experimental_windows_sandbox",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::WindowsSandboxElevated,
key: "elevated_windows_sandbox",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::RemoteCompaction,
key: "remote_compaction",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: true,
},
FeatureSpec {
id: Feature::RemoteModels,
key: "remote_models",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::PowershellUtf8,
key: "powershell_utf8",
#[cfg(windows)]
stage: Stage::Beta {
stage: Stage::Experimental {
name: "Powershell UTF-8 support",
menu_description: "Enable UTF-8 output in Powershell.",
announcement: "Codex now supports UTF-8 output in Powershell. If you are seeing problems, disable in /experimental.",
@@ -414,32 +419,32 @@ pub const FEATURES: &[FeatureSpec] = &[
#[cfg(windows)]
default_enabled: true,
#[cfg(not(windows))]
stage: Stage::Experimental,
stage: Stage::Beta,
#[cfg(not(windows))]
default_enabled: false,
},
FeatureSpec {
id: Feature::EnableRequestCompression,
key: "enable_request_compression",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::Collab,
key: "collab",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::Tui2,
key: "tui2",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::Steer,
key: "steer",
stage: Stage::Beta {
stage: Stage::Experimental {
name: "Steer conversation",
menu_description: "Enter submits immediately; Tab queues messages when a task is running.",
announcement: "NEW! Try Steer mode: Enter submits immediately, Tab queues. Enable in /experimental!",
@@ -449,7 +454,7 @@ pub const FEATURES: &[FeatureSpec] = &[
FeatureSpec {
id: Feature::CollaborationModes,
key: "collaboration_modes",
stage: Stage::Experimental,
stage: Stage::Beta,
default_enabled: false,
},
];

View File

@@ -86,6 +86,9 @@ pub(crate) fn with_config_overrides(mut model: ModelInfo, config: &Config) -> Mo
}
};
}
if let Some(base_instructions) = &config.base_instructions {
model.base_instructions = base_instructions.clone();
}
model
}

View File

@@ -1,5 +1,9 @@
use codex_utils_absolute_path::AbsolutePathBuf;
use std::collections::HashSet;
use std::io;
use std::path::Path;
use std::path::PathBuf;
use tempfile::NamedTempFile;
use crate::env;
@@ -8,6 +12,106 @@ pub fn normalize_for_path_comparison(path: impl AsRef<Path>) -> std::io::Result<
Ok(normalize_for_wsl(canonical))
}
pub struct SymlinkWritePaths {
pub read_path: Option<PathBuf>,
pub write_path: PathBuf,
}
/// Resolve the final filesystem target for `path` while retaining a safe write path.
///
/// This follows symlink chains (including relative symlink targets) until it reaches a
/// non-symlink path. If the chain cycles or any metadata/link resolution fails, it
/// returns `read_path: None` and uses the original absolute path as `write_path`.
/// There is no fixed max-resolution count; cycles are detected via a visited set.
pub fn resolve_symlink_write_paths(path: &Path) -> io::Result<SymlinkWritePaths> {
let root = AbsolutePathBuf::from_absolute_path(path)
.map(AbsolutePathBuf::into_path_buf)
.unwrap_or_else(|_| path.to_path_buf());
let mut current = root.clone();
let mut visited = HashSet::new();
// Follow symlink chains while guarding against cycles.
loop {
let meta = match std::fs::symlink_metadata(&current) {
Ok(meta) => meta,
Err(err) if err.kind() == io::ErrorKind::NotFound => {
return Ok(SymlinkWritePaths {
read_path: Some(current.clone()),
write_path: current,
});
}
Err(_) => {
return Ok(SymlinkWritePaths {
read_path: None,
write_path: root,
});
}
};
if !meta.file_type().is_symlink() {
return Ok(SymlinkWritePaths {
read_path: Some(current.clone()),
write_path: current,
});
}
// If we've already seen this path, the chain cycles.
if !visited.insert(current.clone()) {
return Ok(SymlinkWritePaths {
read_path: None,
write_path: root,
});
}
let target = match std::fs::read_link(&current) {
Ok(target) => target,
Err(_) => {
return Ok(SymlinkWritePaths {
read_path: None,
write_path: root,
});
}
};
let next = if target.is_absolute() {
AbsolutePathBuf::from_absolute_path(&target)
} else if let Some(parent) = current.parent() {
AbsolutePathBuf::resolve_path_against_base(&target, parent)
} else {
return Ok(SymlinkWritePaths {
read_path: None,
write_path: root,
});
};
let next = match next {
Ok(path) => path.into_path_buf(),
Err(_) => {
return Ok(SymlinkWritePaths {
read_path: None,
write_path: root,
});
}
};
current = next;
}
}
pub fn write_atomically(write_path: &Path, contents: &str) -> io::Result<()> {
let parent = write_path.parent().ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidInput,
format!("path {} has no parent directory", write_path.display()),
)
})?;
std::fs::create_dir_all(parent)?;
let tmp = NamedTempFile::new_in(parent)?;
std::fs::write(tmp.path(), contents)?;
tmp.persist(write_path)?;
Ok(())
}
fn normalize_for_wsl(path: PathBuf) -> PathBuf {
normalize_for_wsl_with_flag(path, env::is_wsl())
}
@@ -84,6 +188,29 @@ fn lower_ascii_path(path: PathBuf) -> PathBuf {
#[cfg(test)]
mod tests {
#[cfg(unix)]
mod symlinks {
use super::super::resolve_symlink_write_paths;
use pretty_assertions::assert_eq;
use std::os::unix::fs::symlink;
#[test]
fn symlink_cycles_fall_back_to_root_write_path() -> std::io::Result<()> {
let dir = tempfile::tempdir()?;
let a = dir.path().join("a");
let b = dir.path().join("b");
symlink(&b, &a)?;
symlink(&a, &b)?;
let resolved = resolve_symlink_write_paths(&a)?;
assert_eq!(resolved.read_path, None);
assert_eq!(resolved.write_path, a);
Ok(())
}
}
#[cfg(target_os = "linux")]
mod wsl {
use super::super::normalize_for_wsl_with_flag;

View File

@@ -67,6 +67,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::ExecCommandOutputDelta(_)
| EventMsg::ExecCommandEnd(_)
| EventMsg::ExecApprovalRequest(_)
| EventMsg::RequestUserInput(_)
| EventMsg::ElicitationRequest(_)
| EventMsg::ApplyPatchApprovalRequest(_)
| EventMsg::BackgroundEvent(_)

View File

@@ -9,6 +9,7 @@ use std::path::PathBuf;
use std::time::SystemTime;
use codex_protocol::ThreadId;
use codex_protocol::models::BaseInstructions;
use serde_json::Value;
use time::OffsetDateTime;
use time::format_description::FormatItem;
@@ -59,6 +60,7 @@ pub enum RolloutRecorderParams {
conversation_id: ThreadId,
forked_from_id: Option<ThreadId>,
source: SessionSource,
base_instructions: BaseInstructions,
},
Resume {
path: PathBuf,
@@ -81,11 +83,13 @@ impl RolloutRecorderParams {
conversation_id: ThreadId,
forked_from_id: Option<ThreadId>,
source: SessionSource,
base_instructions: BaseInstructions,
) -> Self {
Self::Create {
conversation_id,
forked_from_id,
source,
base_instructions,
}
}
@@ -160,6 +164,7 @@ impl RolloutRecorder {
conversation_id,
forked_from_id,
source,
base_instructions,
} => {
let LogFileInfo {
file,
@@ -188,6 +193,7 @@ impl RolloutRecorder {
cli_version: env!("CARGO_PKG_VERSION").to_string(),
source,
model_provider: Some(config.model_provider_id.clone()),
base_instructions: Some(base_instructions),
}),
)
}

View File

@@ -6,6 +6,7 @@ use std::fs::{self};
use std::io::Write;
use std::path::Path;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
use time::Duration;
use time::OffsetDateTime;
@@ -89,6 +90,7 @@ fn write_session_file_with_provider(
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version",
"base_instructions": null,
});
if let Some(source) = source {
@@ -129,6 +131,48 @@ fn write_session_file_with_provider(
Ok((dt, uuid))
}
fn write_session_file_with_meta_payload(
root: &Path,
ts_str: &str,
uuid: Uuid,
payload: serde_json::Value,
) -> std::io::Result<()> {
let format: &[FormatItem] =
format_description!("[year]-[month]-[day]T[hour]-[minute]-[second]");
let dt = PrimitiveDateTime::parse(ts_str, format)
.unwrap()
.assume_utc();
let dir = root
.join("sessions")
.join(format!("{:04}", dt.year()))
.join(format!("{:02}", u8::from(dt.month())))
.join(format!("{:02}", dt.day()));
fs::create_dir_all(&dir)?;
let filename = format!("rollout-{ts_str}-{uuid}.jsonl");
let file_path = dir.join(filename);
let mut file = File::create(file_path)?;
let meta = serde_json::json!({
"timestamp": ts_str,
"type": "session_meta",
"payload": payload,
});
writeln!(file, "{meta}")?;
let user_event = serde_json::json!({
"timestamp": ts_str,
"type": "event_msg",
"payload": {"type": "user_message", "message": "Hello from user", "kind": "plain"}
});
writeln!(file, "{user_event}")?;
let times = FileTimes::new().set_modified(dt.into());
file.set_times(times)?;
Ok(())
}
#[tokio::test]
async fn test_list_conversations_latest_first() {
let temp = TempDir::new().unwrap();
@@ -206,6 +250,7 @@ async fn test_list_conversations_latest_first() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let head_2 = vec![serde_json::json!({
"id": u2,
@@ -215,6 +260,7 @@ async fn test_list_conversations_latest_first() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let head_1 = vec![serde_json::json!({
"id": u1,
@@ -224,6 +270,7 @@ async fn test_list_conversations_latest_first() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let updated_times: Vec<Option<String>> =
@@ -344,6 +391,7 @@ async fn test_pagination_cursor() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let head_4 = vec![serde_json::json!({
"id": u4,
@@ -353,6 +401,7 @@ async fn test_pagination_cursor() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let updated_page1: Vec<Option<String>> =
page1.items.iter().map(|i| i.updated_at.clone()).collect();
@@ -410,6 +459,7 @@ async fn test_pagination_cursor() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let head_2 = vec![serde_json::json!({
"id": u2,
@@ -419,6 +469,7 @@ async fn test_pagination_cursor() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let updated_page2: Vec<Option<String>> =
page2.items.iter().map(|i| i.updated_at.clone()).collect();
@@ -470,6 +521,7 @@ async fn test_pagination_cursor() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let updated_page3: Vec<Option<String>> =
page3.items.iter().map(|i| i.updated_at.clone()).collect();
@@ -527,6 +579,7 @@ async fn test_get_thread_contents() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})];
let expected_page = ThreadsPage {
items: vec![ThreadItem {
@@ -551,6 +604,7 @@ async fn test_get_thread_contents() {
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version",
"base_instructions": null,
"source": "vscode",
"model_provider": "test-provider",
}
@@ -566,6 +620,93 @@ async fn test_get_thread_contents() {
assert_eq!(content, expected_content);
}
#[tokio::test]
async fn test_base_instructions_missing_in_meta_defaults_to_null() {
let temp = TempDir::new().unwrap();
let home = temp.path();
let ts = "2025-04-02T10-30-00";
let uuid = Uuid::from_u128(101);
let payload = serde_json::json!({
"id": uuid,
"timestamp": ts,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
});
write_session_file_with_meta_payload(home, ts, uuid, payload).unwrap();
let provider_filter = provider_vec(&[TEST_PROVIDER]);
let page = get_threads(
home,
1,
None,
ThreadSortKey::CreatedAt,
INTERACTIVE_SESSION_SOURCES,
Some(provider_filter.as_slice()),
TEST_PROVIDER,
)
.await
.unwrap();
let head = page
.items
.first()
.and_then(|item| item.head.first())
.expect("session meta head");
assert_eq!(
head.get("base_instructions"),
Some(&serde_json::Value::Null)
);
}
#[tokio::test]
async fn test_base_instructions_present_in_meta_is_preserved() {
let temp = TempDir::new().unwrap();
let home = temp.path();
let ts = "2025-04-03T10-30-00";
let uuid = Uuid::from_u128(102);
let base_text = "Custom base instructions";
let payload = serde_json::json!({
"id": uuid,
"timestamp": ts,
"cwd": ".",
"originator": "test_originator",
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": {"text": base_text},
});
write_session_file_with_meta_payload(home, ts, uuid, payload).unwrap();
let provider_filter = provider_vec(&[TEST_PROVIDER]);
let page = get_threads(
home,
1,
None,
ThreadSortKey::CreatedAt,
INTERACTIVE_SESSION_SOURCES,
Some(provider_filter.as_slice()),
TEST_PROVIDER,
)
.await
.unwrap();
let head = page
.items
.first()
.and_then(|item| item.head.first())
.expect("session meta head");
let base = head
.get("base_instructions")
.and_then(|value| value.get("text"))
.and_then(serde_json::Value::as_str);
assert_eq!(base, Some(base_text));
}
#[tokio::test]
async fn test_created_at_sort_uses_file_mtime_for_updated_at() -> Result<()> {
let temp = TempDir::new().unwrap();
@@ -637,6 +778,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> {
cli_version: "test_version".into(),
source: SessionSource::VSCode,
model_provider: Some("test-provider".into()),
base_instructions: None,
},
git: None,
}),
@@ -744,6 +886,7 @@ async fn test_stable_ordering_same_second_pagination() {
"cli_version": "test_version",
"source": "vscode",
"model_provider": "test-provider",
"base_instructions": null,
})]
};
let updated_page1: Vec<Option<String>> =

View File

@@ -135,6 +135,13 @@ async fn run_shell_script_with_timeout(
// returns a ref of handler.
let mut handler = Command::new(&args[0]);
handler.args(&args[1..]);
#[cfg(unix)]
unsafe {
handler.pre_exec(|| {
codex_utils_pty::process_group::detach_from_tty()?;
Ok(())
});
}
handler.kill_on_drop(true);
let output = timeout(snapshot_timeout, handler.output())
.await

View File

@@ -66,12 +66,12 @@ pub(crate) async fn spawn_child_async(
#[cfg(unix)]
unsafe {
let set_process_group = matches!(stdio_policy, StdioPolicy::RedirectForShellTool);
let detach_from_tty = matches!(stdio_policy, StdioPolicy::RedirectForShellTool);
#[cfg(target_os = "linux")]
let parent_pid = libc::getpid();
cmd.pre_exec(move || {
if set_process_group {
codex_utils_pty::process_group::set_process_group()?;
if detach_from_tty {
codex_utils_pty::process_group::detach_from_tty()?;
}
// This relies on prctl(2), so it only works on Linux.

View File

@@ -14,6 +14,7 @@ pub(crate) struct SessionState {
pub(crate) session_configuration: SessionConfiguration,
pub(crate) history: ContextManager,
pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
pub(crate) server_reasoning_included: bool,
}
impl SessionState {
@@ -24,6 +25,7 @@ impl SessionState {
session_configuration,
history,
latest_rate_limits: None,
server_reasoning_included: false,
}
}
@@ -78,8 +80,17 @@ impl SessionState {
self.history.set_token_usage_full(context_window);
}
pub(crate) fn get_total_token_usage(&self) -> i64 {
self.history.get_total_token_usage()
pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 {
self.history
.get_total_token_usage(server_reasoning_included)
}
pub(crate) fn set_server_reasoning_included(&mut self, included: bool) {
self.server_reasoning_included = included;
}
pub(crate) fn server_reasoning_included(&self) -> bool {
self.server_reasoning_included
}
}

View File

@@ -9,6 +9,7 @@ use tokio_util::sync::CancellationToken;
use tokio_util::task::AbortOnDropHandle;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::request_user_input::RequestUserInputResponse;
use tokio::sync::oneshot;
use crate::codex::TurnContext;
@@ -37,7 +38,6 @@ pub(crate) enum TaskKind {
Compact,
}
#[derive(Clone)]
pub(crate) struct RunningTask {
pub(crate) done: Arc<Notify>,
pub(crate) kind: TaskKind,
@@ -45,6 +45,8 @@ pub(crate) struct RunningTask {
pub(crate) cancellation_token: CancellationToken,
pub(crate) handle: Arc<AbortOnDropHandle<()>>,
pub(crate) turn_context: Arc<TurnContext>,
// Timer recorded when the task drops to capture the full turn duration.
pub(crate) _timer: Option<codex_otel::Timer>,
}
impl ActiveTurn {
@@ -67,6 +69,7 @@ impl ActiveTurn {
#[derive(Default)]
pub(crate) struct TurnState {
pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
pending_user_input: HashMap<String, oneshot::Sender<RequestUserInputResponse>>,
pending_input: Vec<ResponseInputItem>,
}
@@ -88,9 +91,25 @@ impl TurnState {
pub(crate) fn clear_pending(&mut self) {
self.pending_approvals.clear();
self.pending_user_input.clear();
self.pending_input.clear();
}
pub(crate) fn insert_pending_user_input(
&mut self,
key: String,
tx: oneshot::Sender<RequestUserInputResponse>,
) -> Option<oneshot::Sender<RequestUserInputResponse>> {
self.pending_user_input.insert(key, tx)
}
pub(crate) fn remove_pending_user_input(
&mut self,
key: &str,
) -> Option<oneshot::Sender<RequestUserInputResponse>> {
self.pending_user_input.remove(key)
}
pub(crate) fn push_pending_input(&mut self, input: ResponseInputItem) {
self.pending_input.push(input);
}

View File

@@ -144,6 +144,12 @@ impl Session {
})
};
let timer = turn_context
.client
.get_otel_manager()
.start_timer("codex.turn.e2e_duration_ms", &[])
.ok();
let running_task = RunningTask {
done,
handle: Arc::new(AbortOnDropHandle::new(handle)),
@@ -151,6 +157,7 @@ impl Session {
task,
cancellation_token,
turn_context: Arc::clone(&turn_context),
_timer: timer,
};
self.register_new_active_task(running_task).await;
}

View File

@@ -30,6 +30,7 @@ impl SessionTask for RegularTask {
) -> Option<String> {
let sess = session.clone_session();
let run_turn_span = trace_span!("run_turn");
sess.set_server_reasoning_included(false).await;
sess.services
.otel_manager
.apply_traceparent_parent(&run_turn_span);

View File

@@ -190,8 +190,8 @@ pub(crate) async fn exit_review_mode(
review_output: Option<ReviewOutputEvent>,
ctx: Arc<TurnContext>,
) {
const REVIEW_USER_MESSAGE_ID: &str = "review:rollout:user";
const REVIEW_ASSISTANT_MESSAGE_ID: &str = "review:rollout:assistant";
const REVIEW_USER_MESSAGE_ID: &str = "review_rollout_user";
const REVIEW_ASSISTANT_MESSAGE_ID: &str = "review_rollout_assistant";
let (user_message, assistant_message) = if let Some(out) = review_output.clone() {
let mut findings_str = String::new();
let text = out.overall_explanation.trim();

View File

@@ -235,6 +235,15 @@ impl ThreadManager {
self.state.threads.write().await.remove(thread_id)
}
/// Closes all threads open in this ThreadManager
pub async fn remove_and_close_all_threads(&self) -> CodexResult<()> {
for thread in self.state.threads.read().await.values() {
thread.submit(Op::Shutdown).await?;
}
self.state.threads.write().await.clear();
Ok(())
}
/// Fork an existing thread by taking messages up to the given position (not including
/// the message at the given position) and starting a new thread with identical
/// configuration (unless overridden by the caller's `config`). The new thread will have

View File

@@ -12,6 +12,7 @@ use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use async_trait::async_trait;
use codex_protocol::ThreadId;
use codex_protocol::models::BaseInstructions;
use codex_protocol::protocol::CollabAgentInteractionBeginEvent;
use codex_protocol::protocol::CollabAgentInteractionEndEvent;
use codex_protocol::protocol::CollabAgentSpawnBeginEvent;
@@ -115,10 +116,12 @@ mod spawn {
.into(),
)
.await;
let mut config = build_agent_spawn_config(turn.as_ref())?;
let mut config =
build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?;
agent_role
.apply_to_config(&mut config)
.map_err(FunctionCallError::RespondToModel)?;
let result = session
.services
.agent_control
@@ -557,15 +560,18 @@ fn collab_agent_error(agent_id: ThreadId, err: CodexErr) -> FunctionCallError {
}
}
fn build_agent_spawn_config(turn: &TurnContext) -> Result<Config, FunctionCallError> {
fn build_agent_spawn_config(
base_instructions: &BaseInstructions,
turn: &TurnContext,
) -> Result<Config, FunctionCallError> {
let base_config = turn.client.config();
let mut config = (*base_config).clone();
config.base_instructions = Some(base_instructions.text.clone());
config.model = Some(turn.client.get_model());
config.model_provider = turn.client.get_provider();
config.model_reasoning_effort = turn.client.get_reasoning_effort();
config.model_reasoning_summary = turn.client.get_reasoning_summary();
config.developer_instructions = turn.developer_instructions.clone();
config.base_instructions = turn.base_instructions.clone();
config.compact_prompt = turn.compact_prompt.clone();
config.user_instructions = turn.user_instructions.clone();
config.shell_environment_policy = turn.shell_environment_policy.clone();
@@ -1062,8 +1068,10 @@ mod tests {
#[tokio::test]
async fn build_agent_spawn_config_uses_turn_context_values() {
let (_session, mut turn) = make_session_and_context().await;
let base_instructions = BaseInstructions {
text: "base".to_string(),
};
turn.developer_instructions = Some("dev".to_string());
turn.base_instructions = Some("base".to_string());
turn.compact_prompt = Some("compact".to_string());
turn.user_instructions = Some("user".to_string());
turn.shell_environment_policy = ShellEnvironmentPolicy {
@@ -1076,14 +1084,14 @@ mod tests {
turn.approval_policy = AskForApproval::Never;
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
let config = build_agent_spawn_config(&turn).expect("spawn config");
let config = build_agent_spawn_config(&base_instructions, &turn).expect("spawn config");
let mut expected = (*turn.client.config()).clone();
expected.base_instructions = Some(base_instructions.text);
expected.model = Some(turn.client.get_model());
expected.model_provider = turn.client.get_provider();
expected.model_reasoning_effort = turn.client.get_reasoning_effort();
expected.model_reasoning_summary = turn.client.get_reasoning_summary();
expected.developer_instructions = turn.developer_instructions.clone();
expected.base_instructions = turn.base_instructions.clone();
expected.compact_prompt = turn.compact_prompt.clone();
expected.user_instructions = turn.user_instructions.clone();
expected.shell_environment_policy = turn.shell_environment_policy.clone();

View File

@@ -6,6 +6,7 @@ mod mcp;
mod mcp_resource;
mod plan;
mod read_file;
mod request_user_input;
mod shell;
mod test_sync;
mod unified_exec;
@@ -23,6 +24,7 @@ pub use mcp::McpHandler;
pub use mcp_resource::McpResourceHandler;
pub use plan::PlanHandler;
pub use read_file::ReadFileHandler;
pub use request_user_input::RequestUserInputHandler;
pub use shell::ShellCommandHandler;
pub use shell::ShellHandler;
pub use test_sync::TestSyncHandler;

View File

@@ -0,0 +1,60 @@
use async_trait::async_trait;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::request_user_input::RequestUserInputArgs;
pub struct RequestUserInputHandler;
#[async_trait]
impl ToolHandler for RequestUserInputHandler {
fn kind(&self) -> ToolKind {
ToolKind::Function
}
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
let ToolInvocation {
session,
turn,
call_id,
payload,
..
} = invocation;
let arguments = match payload {
ToolPayload::Function { arguments } => arguments,
_ => {
return Err(FunctionCallError::RespondToModel(
"request_user_input handler received unsupported payload".to_string(),
));
}
};
let args: RequestUserInputArgs = parse_arguments(&arguments)?;
let response = session
.request_user_input(turn.as_ref(), call_id, args)
.await
.ok_or_else(|| {
FunctionCallError::RespondToModel(
"request_user_input was cancelled before receiving a response".to_string(),
)
})?;
let content = serde_json::to_string(&response).map_err(|err| {
FunctionCallError::Fatal(format!(
"failed to serialize request_user_input response: {err}"
))
})?;
Ok(ToolOutput::Function {
content,
content_items: None,
success: Some(true),
})
}
}

View File

@@ -27,6 +27,7 @@ pub(crate) struct ToolsConfig {
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
pub web_search_mode: Option<WebSearchMode>,
pub collab_tools: bool,
pub collaboration_modes_tools: bool,
pub experimental_supported_tools: Vec<String>,
}
@@ -45,6 +46,7 @@ impl ToolsConfig {
} = params;
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
let include_collab_tools = features.enabled(Feature::Collab);
let include_collaboration_modes_tools = features.enabled(Feature::CollaborationModes);
let shell_type = if !features.enabled(Feature::ShellTool) {
ConfigShellToolType::Disabled
@@ -76,6 +78,7 @@ impl ToolsConfig {
apply_patch_tool_type,
web_search_mode: *web_search_mode,
collab_tools: include_collab_tools,
collaboration_modes_tools: include_collaboration_modes_tools,
experimental_supported_tools: model_info.experimental_supported_tools.clone(),
}
}
@@ -532,6 +535,88 @@ fn create_wait_tool() -> ToolSpec {
})
}
fn create_request_user_input_tool() -> ToolSpec {
let mut option_props = BTreeMap::new();
option_props.insert(
"label".to_string(),
JsonSchema::String {
description: Some("User-facing label (1-5 words).".to_string()),
},
);
option_props.insert(
"description".to_string(),
JsonSchema::String {
description: Some(
"One short sentence explaining impact/tradeoff if selected.".to_string(),
),
},
);
let options_schema = JsonSchema::Array {
description: Some(
"Optional 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with \"(Recommended)\". Only include \"Other\" option if we want to include a free form option. If the question is free form in nature, please do not have any option."
.to_string(),
),
items: Box::new(JsonSchema::Object {
properties: option_props,
required: Some(vec!["label".to_string(), "description".to_string()]),
additional_properties: Some(false.into()),
}),
};
let mut question_props = BTreeMap::new();
question_props.insert(
"id".to_string(),
JsonSchema::String {
description: Some("Stable identifier for mapping answers (snake_case).".to_string()),
},
);
question_props.insert(
"header".to_string(),
JsonSchema::String {
description: Some(
"Short header label shown in the UI (12 or fewer chars).".to_string(),
),
},
);
question_props.insert(
"question".to_string(),
JsonSchema::String {
description: Some("Single-sentence prompt shown to the user.".to_string()),
},
);
question_props.insert("options".to_string(), options_schema);
let questions_schema = JsonSchema::Array {
description: Some("Questions to show the user. Prefer 1 and do not exceed 3".to_string()),
items: Box::new(JsonSchema::Object {
properties: question_props,
required: Some(vec![
"id".to_string(),
"header".to_string(),
"question".to_string(),
]),
additional_properties: Some(false.into()),
}),
};
let mut properties = BTreeMap::new();
properties.insert("questions".to_string(), questions_schema);
ToolSpec::Function(ResponsesApiTool {
name: "request_user_input".to_string(),
description:
"Request user input for one to three short questions and wait for the response."
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["questions".to_string()]),
additional_properties: Some(false.into()),
},
})
}
fn create_close_agent_tool() -> ToolSpec {
let mut properties = BTreeMap::new();
properties.insert(
@@ -1140,6 +1225,7 @@ pub(crate) fn build_specs(
use crate::tools::handlers::McpResourceHandler;
use crate::tools::handlers::PlanHandler;
use crate::tools::handlers::ReadFileHandler;
use crate::tools::handlers::RequestUserInputHandler;
use crate::tools::handlers::ShellCommandHandler;
use crate::tools::handlers::ShellHandler;
use crate::tools::handlers::TestSyncHandler;
@@ -1157,6 +1243,7 @@ pub(crate) fn build_specs(
let mcp_handler = Arc::new(McpHandler);
let mcp_resource_handler = Arc::new(McpResourceHandler);
let shell_command_handler = Arc::new(ShellCommandHandler);
let request_user_input_handler = Arc::new(RequestUserInputHandler);
match &config.shell_type {
ConfigShellToolType::Default => {
@@ -1197,6 +1284,11 @@ pub(crate) fn build_specs(
builder.push_spec(PLAN_TOOL.clone());
builder.register_handler("update_plan", plan_handler);
if config.collaboration_modes_tools {
builder.push_spec(create_request_user_input_tool());
builder.register_handler("request_user_input", request_user_input_handler);
}
if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type {
match apply_patch_tool_type {
ApplyPatchToolType::Freeform => {
@@ -1398,6 +1490,7 @@ mod tests {
let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config);
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::CollaborationModes);
let config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
features: &features,
@@ -1430,6 +1523,7 @@ mod tests {
create_list_mcp_resource_templates_tool(),
create_read_mcp_resource_tool(),
PLAN_TOOL.clone(),
create_request_user_input_tool(),
create_apply_patch_freeform_tool(),
ToolSpec::WebSearch {
external_web_access: Some(true),
@@ -1460,6 +1554,7 @@ mod tests {
let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config);
let mut features = Features::with_defaults();
features.enable(Feature::Collab);
features.enable(Feature::CollaborationModes);
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
features: &features,
@@ -1472,6 +1567,33 @@ mod tests {
);
}
#[test]
fn request_user_input_requires_collaboration_modes_feature() {
let config = test_config();
let model_info = ModelsManager::construct_model_info_offline("gpt-5-codex", &config);
let mut features = Features::with_defaults();
features.disable(Feature::CollaborationModes);
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
features: &features,
web_search_mode: Some(WebSearchMode::Cached),
});
let (tools, _) = build_specs(&tools_config, None).build();
assert!(
!tools.iter().any(|t| t.spec.name() == "request_user_input"),
"request_user_input should be disabled when collaboration_modes feature is off"
);
features.enable(Feature::CollaborationModes);
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
features: &features,
web_search_mode: Some(WebSearchMode::Cached),
});
let (tools, _) = build_specs(&tools_config, None).build();
assert_contains_tool_names(&tools, &["request_user_input"]);
}
fn assert_model_tools(
model_slug: &str,
features: &Features,
@@ -1536,9 +1658,11 @@ mod tests {
#[test]
fn test_build_specs_gpt5_codex_default() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5-codex",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"shell_command",
@@ -1546,6 +1670,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1555,9 +1680,11 @@ mod tests {
#[test]
fn test_build_specs_gpt51_codex_default() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5.1-codex",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"shell_command",
@@ -1565,6 +1692,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1574,9 +1702,12 @@ mod tests {
#[test]
fn test_build_specs_gpt5_codex_unified_exec_web_search() {
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5-codex",
Features::with_defaults().enable(Feature::UnifiedExec),
&features,
Some(WebSearchMode::Live),
&[
"exec_command",
@@ -1585,6 +1716,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1594,9 +1726,12 @@ mod tests {
#[test]
fn test_build_specs_gpt51_codex_unified_exec_web_search() {
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5.1-codex",
Features::with_defaults().enable(Feature::UnifiedExec),
&features,
Some(WebSearchMode::Live),
&[
"exec_command",
@@ -1605,6 +1740,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1614,9 +1750,11 @@ mod tests {
#[test]
fn test_codex_mini_defaults() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"codex-mini-latest",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"local_shell",
@@ -1624,6 +1762,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"web_search",
"view_image",
],
@@ -1632,9 +1771,11 @@ mod tests {
#[test]
fn test_codex_5_1_mini_defaults() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5.1-codex-mini",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"shell_command",
@@ -1642,6 +1783,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1651,9 +1793,11 @@ mod tests {
#[test]
fn test_gpt_5_defaults() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"shell",
@@ -1661,6 +1805,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"web_search",
"view_image",
],
@@ -1669,9 +1814,11 @@ mod tests {
#[test]
fn test_gpt_5_1_defaults() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"gpt-5.1",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"shell_command",
@@ -1679,6 +1826,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1688,9 +1836,11 @@ mod tests {
#[test]
fn test_exp_5_1_defaults() {
let mut features = Features::with_defaults();
features.enable(Feature::CollaborationModes);
assert_model_tools(
"exp-5.1",
&Features::with_defaults(),
&features,
Some(WebSearchMode::Cached),
&[
"exec_command",
@@ -1699,6 +1849,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -1708,9 +1859,12 @@ mod tests {
#[test]
fn test_codex_mini_unified_exec_web_search() {
let mut features = Features::with_defaults();
features.enable(Feature::UnifiedExec);
features.enable(Feature::CollaborationModes);
assert_model_tools(
"codex-mini-latest",
Features::with_defaults().enable(Feature::UnifiedExec),
&features,
Some(WebSearchMode::Live),
&[
"exec_command",
@@ -1719,6 +1873,7 @@ mod tests {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"web_search",
"view_image",
],

View File

@@ -14,10 +14,12 @@ You are Codex Orchestrator, based on GPT-5. You are running as an orchestration
* **Never stop monitoring workers.**
* **Do not rush workers. Be patient.**
* The orchestrator must not return unless the task is fully accomplished.
* If the user ask you a question/status while you are working, always answer him before continuing your work.
## Worker execution semantics
* While a worker is running, you cannot observe intermediate state.
* Workers are able to run commands, update/create/delete files etc. They can be considered as fully autonomous agents
* Messages sent with `send_input` are queued and processed only after the worker finishes, unless interrupted.
* Therefore:
* Do not send messages to “check status” or “ask for progress” unless being asked.
@@ -40,7 +42,7 @@ You are Codex Orchestrator, based on GPT-5. You are running as an orchestration
* verify correctness,
* check integration with other work,
* assess whether the global task is closer to completion.
5. If issues remain, assign fixes to the appropriate worker(s) and repeat steps 35.
5. If issues remain, assign fixes to the appropriate worker(s) and repeat steps 35. Do not fix yourself unless the fixes are very small.
6. Close agents only when no further work is required from them.
7. Return to the user only when the task is fully completed and verified.

View File

@@ -1,40 +1,133 @@
# Collaboration Style: Plan
You work in 2 distinct modes:
1. Brainstorming: You collaboratively align with the user on what to do or build and how to do it or build it.
2. Writing and confirming a plan: After you've gathered all the information you write up a plan and verify it with the user.
You usually start with the planning step. Skip step 1 if the user provides you with a detailed plan or a small, unambiguous task or plan OR if the user asks you to plan by yourself.
2. Generating a plan: After you've gathered all the information you write up a plan.
You usually start with the brainstorming step. Skip step 1 if the user provides you with a detailed plan or a small, unambiguous task or plan OR if the user asks you to plan by yourself.
## Brainstorming principles
The point of brainstorming with the user is to align on what to do and how to do it. This phase is iterative and conversational. You can interact with the environment and read files if it is helpful, but be mindful of the time.
You MUST follow the principles below. Think about them carefully as you work with the user. Follow the structure and tone of the examples.
*State what you think the user cares about.* Actively infer what matters most (robustness, clean abstractions, quick lovable interfaces, scalability) and reflect this back to the user to confirm.
_State what you think the user cares about._ Actively infer what matters most (robustness, clean abstractions, quick lovable interfaces, scalability) and reflect this back to the user to confirm.
Example: "It seems like you might be prototyping a design for an app, and scalability or performance isn't a concern right now - is that accurate?"
*Think out loud.* Share reasoning when it helps the user evaluate tradeoffs. Keep explanations short and grounded in consequences. Avoid design lectures or exhaustive option lists.
_Think out loud._ Share reasoning when it helps the user evaluate tradeoffs. Keep explanations short and grounded in consequences. Avoid design lectures or exhaustive option lists.
*Use reasonable suggestions.* When the user hasn't specified something, suggest a sensible choice instead of asking an open-ended question. Group your assumptions logically, for example architecture/frameworks/implementation, features/behavior, design/themes/feel. Clearly label suggestions as provisional. Share reasoning when it helps the user evaluate tradeoffs. Keep explanations short and grounded in consequences. They should be easy to accept or override. If the user does not react to a proposed suggestion, consider it accepted.
_Use reasonable suggestions._ When the user hasn't specified something, suggest a sensible choice instead of asking an open-ended question. Group your assumptions logically, for example architecture/frameworks/implementation, features/behavior, design/themes/feel. Clearly label suggestions as provisional. Share reasoning when it helps the user evaluate tradeoffs. Keep explanations short and grounded in consequences. They should be easy to accept or override. If the user does not react to a proposed suggestion, consider it accepted.
Example: "There are a few viable ways to structure this. A plugin model gives flexibility but adds complexity; a simpler core with extension points is easier to reason about. Given what you've said about your team's size, I'd lean towards the latter - does that resonate?"
Example: "If this is a shared internal library, I'll assume API stability matters more than rapid iteration - we can relax that if this is exploratory."
*Ask fewer, better questions.* Prefer making a concrete proposal with stated assumptions over asking questions. Only ask questions when different reasonable suggestions would materially change the plan, you cannot safely proceed, or if you think the user would really want to give input directly. Never ask a question if you already provided a suggestion.
_Ask fewer, better questions._ Prefer making a concrete proposal with stated assumptions over asking questions. Only ask questions when different reasonable suggestions would materially change the plan, you cannot safely proceed, or if you think the user would really want to give input directly. Never ask a question if you already provided a suggestion. You can use `request_user_input` tool to ask questions.
*Think ahead.* What else might the user need? How will the user test and understand what you did? Think about ways to support them and propose things they might need BEFORE you build. Offer at least one suggestion you came up with by thinking ahead.
_Think ahead._ What else might the user need? How will the user test and understand what you did? Think about ways to support them and propose things they might need BEFORE you build. Offer at least one suggestion you came up with by thinking ahead.
Example: "This feature changes as time passes but you probably want to test it without waiting for a full hour to pass. Would you like a debug mode where you can move through states without just waiting?"
*Be mindful of time.* The user is right here with you. Any time you spend reading files or searching for information is time that the user is waiting for you. Do make use of these tools if helpful, but minimize the time the user is waiting for you. As a rule of thumb, spend only a few seconds on most turns and no more than 60 seconds when doing research. If you are missing information and think you need to do longer research, ask the user whether they want you to research, or want to give you a tip.
_Be mindful of time._ The user is right here with you. Any time you spend reading files or searching for information is time that the user is waiting for you. Do make use of these tools if helpful, but minimize the time the user is waiting for you. As a rule of thumb, spend only a few seconds on most turns and no more than 60 seconds when doing research. If you are missing information and think you need to do longer research, ask the user whether they want you to research, or want to give you a tip.
Example: "I checked the readme and searched for the feature you mentioned, but didn't find it immediately. If it's ok, I'll go and spend a bit more time exploring the code base?"
## Using `request_user_input` in Plan Mode
Use `request_user_input` only when you are genuinely blocked on a decision that materially changes the plan (requirements, trade-offs, rollout or risk posture).The maximum number of `request_user_input` tool calls should be **5**.
Only include an "Other" option when a free-form answer is truly useful. If the question is purely free-form, leave `options` unset entirely.
Do **not** use `request_user_input` to ask "is my plan ready?" or "should I proceed?".
### Examples (technical, schema-populated)
**1 Boolean (yes/no), no free-form**
```json
{
"questions": [
{
"id": "enable_migration",
"header": "Migrate",
"question": "Enable the database migration in this release?",
"options": [
{
"label": "Yes (Recommended)",
"description": "Ship the migration with this rollout."
},
{
"label": "No",
"description": "Defer the migration to a later release."
}
]
}
]
}
```
**2 Choice with free-form**
```json
{
"questions": [
{
"id": "cache_strategy",
"header": "Cache",
"question": "Which cache strategy should we implement?",
"options": [
{
"label": "Write-through (Recommended)",
"description": "Simpler consistency with predictable latency."
},
{
"label": "Write-back",
"description": "Lower write latency but higher complexity."
},
{
"label": "Other",
"description": "Provide a custom strategy or constraints."
}
]
}
]
}
```
**3 Free-form only (no options)**
```json
{
"questions": [
{
"id": "rollout_constraints",
"header": "Rollout",
"question": "Any rollout constraints or compliance requirements we must follow?"
}
]
}
```
## Iterating on the plan
Only AFTER you have all the information, write up the full plan.
Only AFTER you have all the information, write up the full plan.
A well written and informative plan should be as detailed as a design doc or PRD and reflect your discussion with the user, at minimum that's one full page! If handed to a different agent, the agent would know exactly what to build without asking questions and arrive at a similar implementation to yours. At minimum it should include:
- tools and frameworks you use, any dependencies you need to install
- functions, files, or directories you're likely going to edit
- QUestions that were asked and the responses from users
- architecture if the code changes are significant
- if developing features, describe the features you are going to build in detail like a PM in a PRD
- if you are developing a frontend, describe the design in detail
- include a list of todos in markdown format if needed. Please do not include a **plan** step given that we are planning here already
`plan.md`: For long, detailed plans, it makes sense to write them in a separate file. If the changes are substantial and the plan is longer than a full page, ask the user if it's ok to write the plan in `plan.md`. If plan.md is used, ALWAYS update the file rather than outputting the plan in your final answer.
### Output schema - — MUST MATCH _exactly_
ALWAYS confirm the plan with the user before ending. If the user requests changes or additions to the plan update the plan. Iterate until the user confirms the plan.
When you present the plan, format the final response as a JSON object with a single key, `plan`, whose value is the full plan text.
Example:
```json
{
"plan": "Title: Schema migration rollout\n\n1. Validate the current schema on staging...\n2. Add the new columns with nullable defaults...\n3. Backfill in batches with feature-flagged writes...\n4. Flip reads to the new fields and monitor...\n5. Remove legacy columns after one full release cycle..."
}
```
PLEASE DO NOT confirm the plan with the user before ending. The user will be responsible for telling us to update, iterate or execute the plan.

View File

@@ -15,10 +15,12 @@ use codex_otel::OtelManager;
use codex_protocol::ThreadId;
use codex_protocol::config_types::ReasoningSummary;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::WebSocketConnectionConfig;
use core_test_support::responses::WebSocketTestServer;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::start_websocket_server;
use core_test_support::responses::start_websocket_server_with_headers;
use core_test_support::skip_if_no_network;
use futures::StreamExt;
use pretty_assertions::assert_eq;
@@ -60,6 +62,40 @@ async fn responses_websocket_streams_request() {
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_emits_reasoning_included_event() {
skip_if_no_network!();
let server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
requests: vec![vec![ev_response_created("resp-1"), ev_completed("resp-1")]],
response_headers: vec![("X-Reasoning-Included".to_string(), "true".to_string())],
}])
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = session
.stream(&prompt)
.await
.expect("websocket stream failed");
let mut saw_reasoning_included = false;
while let Some(event) = stream.next().await {
match event.expect("event") {
ResponseEvent::ServerReasoningIncluded(true) => {
saw_reasoning_included = true;
}
ResponseEvent::Completed { .. } => break,
_ => {}
}
}
assert!(saw_reasoning_included);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_appends_on_prefix() {
skip_if_no_network!();

View File

@@ -32,11 +32,13 @@ use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_completed_with_tokens;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::mount_compact_json_once;
use core_test_support::responses::mount_response_sequence;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::sse_failed;
use core_test_support::responses::sse_response;
use core_test_support::responses::start_mock_server;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -2147,3 +2149,85 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
"third turn should include compaction summary item"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_runs_when_reasoning_header_clears_between_turns() {
skip_if_no_network!();
let server = start_mock_server().await;
let first_user = "SERVER_INCLUDED_FIRST";
let second_user = "SERVER_INCLUDED_SECOND";
let third_user = "SERVER_INCLUDED_THIRD";
let pre_last_reasoning_content = "a".repeat(2_400);
let post_last_reasoning_content = "b".repeat(4_000);
let first_turn = sse(vec![
ev_reasoning_item("pre-reasoning", &["pre"], &[&pre_last_reasoning_content]),
ev_completed_with_tokens("r1", 10),
]);
let second_turn = sse(vec![
ev_reasoning_item("post-reasoning", &["post"], &[&post_last_reasoning_content]),
ev_completed_with_tokens("r2", 80),
]);
let third_turn = sse(vec![
ev_assistant_message("m4", FINAL_REPLY),
ev_completed_with_tokens("r4", 1),
]);
let responses = vec![
sse_response(first_turn).insert_header("X-Reasoning-Included", "true"),
sse_response(second_turn),
sse_response(third_turn),
];
mount_response_sequence(&server, responses).await;
let compacted_history = vec![
codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: "REMOTE_COMPACT_SUMMARY".to_string(),
}],
},
codex_protocol::models::ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
},
];
let compact_mock =
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
let codex = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
set_test_compact_prompt(config);
config.model_auto_compact_token_limit = Some(300);
config.features.enable(Feature::RemoteCompaction);
})
.build(&server)
.await
.expect("build codex")
.codex;
for user in [first_user, second_user, third_user] {
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: user.into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
}
let compact_requests = compact_mock.requests();
assert_eq!(
compact_requests.len(),
1,
"remote compaction should run once after the reasoning header clears"
);
}

View File

@@ -53,6 +53,7 @@ mod quota_exceeded;
mod read_file;
mod remote_models;
mod request_compression;
mod request_user_input;
mod resume;
mod resume_warning;
mod review;

View File

@@ -1,5 +1,6 @@
#![allow(clippy::unwrap_used)]
use codex_core::features::Feature;
use codex_protocol::config_types::WebSearchMode;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::responses;
@@ -36,7 +37,10 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
let mut builder = test_codex()
.with_model(model)
// Keep tool expectations stable when the default web_search mode changes.
.with_config(|config| config.web_search_mode = Some(WebSearchMode::Cached));
.with_config(|config| {
config.web_search_mode = Some(WebSearchMode::Cached);
config.features.enable(Feature::CollaborationModes);
});
let test = builder
.build(&server)
.await
@@ -62,6 +66,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"web_search".to_string(),
"view_image".to_string()
],
@@ -77,6 +82,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
"view_image".to_string()
@@ -93,6 +99,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
"view_image".to_string()
@@ -109,6 +116,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"web_search".to_string(),
"view_image".to_string()
],
@@ -124,6 +132,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
"view_image".to_string()
@@ -140,6 +149,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
"view_image".to_string()

View File

@@ -92,6 +92,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
config.model = Some("gpt-5.1-codex-max".to_string());
// Keep tool expectations stable when the default web_search mode changes.
config.web_search_mode = Some(WebSearchMode::Cached);
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
@@ -135,6 +136,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"request_user_input",
"apply_patch",
"web_search",
"view_image",
@@ -176,6 +178,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.disable(Feature::ApplyPatchFreeform);
config.features.enable(Feature::CollaborationModes);
config.model = Some("codex-mini-latest".to_string());
})
.build(&server)
@@ -240,6 +243,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
let TestCodex { codex, config, .. } = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
@@ -316,6 +320,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
@@ -538,6 +543,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
@@ -645,6 +651,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
} = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
@@ -742,6 +749,7 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
} = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;

View File

@@ -388,9 +388,10 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let base_model_info = models_manager.get_model_info("gpt-5.1", &config).await;
let body = response_mock.single_request().body_json();
let instructions = body["instructions"].as_str().unwrap();
assert_eq!(instructions, remote_base);
assert_eq!(instructions, base_model_info.base_instructions);
Ok(())
}

View File

@@ -0,0 +1,155 @@
#![allow(clippy::unwrap_used)]
use std::collections::HashMap;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::request_user_input::RequestUserInputAnswer;
use codex_protocol::request_user_input::RequestUserInputResponse;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::Value;
use serde_json::json;
fn call_output(req: &ResponsesRequest, call_id: &str) -> String {
let raw = req.function_call_output(call_id);
assert_eq!(
raw.get("call_id").and_then(Value::as_str),
Some(call_id),
"mismatched call_id in function_call_output"
);
let (content_opt, _success) = match req.function_call_output_content_and_success(call_id) {
Some(values) => values,
None => panic!("function_call_output present"),
};
match content_opt {
Some(content) => content,
None => panic!("function_call_output content present"),
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn request_user_input_round_trip_resolves_pending() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let builder = test_codex();
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder
.with_config(|config| {
config.features.enable(Feature::CollaborationModes);
})
.build(&server)
.await?;
let call_id = "user-input-call";
let request_args = json!({
"questions": [{
"id": "confirm_path",
"header": "Confirm",
"question": "Proceed with the plan?",
"options": [{
"label": "Yes (Recommended)",
"description": "Continue the current plan."
}, {
"label": "No",
"description": "Stop and revisit the approach."
}]
}]
})
.to_string();
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "request_user_input", &request_args),
ev_completed("resp-1"),
]);
responses::mount_sse_once(&server, first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "thanks"),
ev_completed("resp-2"),
]);
let second_mock = responses::mount_sse_once(&server, second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please confirm".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
})
.await?;
let request = wait_for_event_match(&codex, |event| match event {
EventMsg::RequestUserInput(request) => Some(request.clone()),
_ => None,
})
.await;
assert_eq!(request.call_id, call_id);
assert_eq!(request.questions.len(), 1);
let mut answers = HashMap::new();
answers.insert(
"confirm_path".to_string(),
RequestUserInputAnswer {
selected: vec!["yes".to_string()],
other: None,
},
);
let response = RequestUserInputResponse { answers };
codex
.submit(Op::UserInputAnswer {
id: request.turn_id.clone(),
response,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let req = second_mock.single_request();
let output_text = call_output(&req, call_id);
let output_json: Value = serde_json::from_str(&output_text)?;
assert_eq!(
output_json,
json!({
"answers": {
"confirm_path": { "selected": ["yes"], "other": Value::Null }
}
})
);
Ok(())
}

View File

@@ -131,3 +131,78 @@ async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()>
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resume_switches_models_preserves_base_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = Some("gpt-5.2".to_string());
});
let initial = builder.build(&server).await?;
let codex = Arc::clone(&initial.codex);
let home = initial.home.clone();
let rollout_path = initial.session_configured.rollout_path.clone();
let initial_sse = sse(vec![
ev_response_created("resp-initial"),
ev_assistant_message("msg-1", "Completed first turn"),
ev_completed("resp-initial"),
]);
let initial_mock = mount_sse_once(&server, initial_sse).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "Record initial instructions".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let initial_body = initial_mock.single_request().body_json();
let initial_instructions = initial_body
.get("instructions")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let resumed_sse = sse(vec![
ev_response_created("resp-resume"),
ev_assistant_message("msg-2", "Resumed turn"),
ev_completed("resp-resume"),
]);
let resumed_mock = mount_sse_once(&server, resumed_sse).await;
let mut resume_builder = test_codex().with_config(|config| {
config.model = Some("gpt-5.2-codex".to_string());
});
let resumed = resume_builder.resume(&server, home, rollout_path).await?;
resumed
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "Resume with different model".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&resumed.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
let resumed_body = resumed_mock.single_request().body_json();
let resumed_instructions = resumed_body
.get("instructions")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
assert_eq!(resumed_instructions, initial_instructions);
Ok(())
}

View File

@@ -28,7 +28,6 @@ fn resume_history(
model: previous_model.to_string(),
effort: config.model_reasoning_effort,
summary: config.model_reasoning_summary,
base_instructions: None,
user_instructions: None,
developer_instructions: None,
final_output_json_schema: None,

View File

@@ -28,7 +28,6 @@ use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::Value;
use serde_json::json;
fn call_output(req: &ResponsesRequest, call_id: &str) -> (String, Option<bool>) {
let raw = req.function_call_output(call_id);
assert_eq!(