use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicU64; use crate::AuthManager; use crate::CodexAuth; use crate::SandboxState; use crate::agent::AgentControl; use crate::agent::AgentStatus; use crate::agent::MAX_THREAD_SPAWN_DEPTH; use crate::agent::agent_status_from_event; use crate::analytics_client::AnalyticsEventsClient; use crate::analytics_client::AppInvocation; use crate::analytics_client::build_track_events_context; use crate::apps::render_apps_section; use crate::commit_attribution::commit_message_trailer_instruction; use crate::compact; use crate::compact::CompactCallsite; use crate::compact::run_inline_auto_compact_task; use crate::compact::should_use_remote_compact_task; use crate::compact_remote::run_inline_remote_auto_compact_task; use crate::connectors; use crate::exec_policy::ExecPolicyManager; use crate::features::FEATURES; use crate::features::Feature; use crate::features::Features; use crate::features::maybe_push_unstable_features_warning; use crate::models_manager::manager::ModelsManager; use crate::parse_command::parse_command; use crate::parse_turn_item; use crate::rollout::session_index; use crate::stream_events_utils::HandleOutputCtx; use crate::stream_events_utils::handle_non_tool_response_item; use crate::stream_events_utils::handle_output_item_done; use crate::stream_events_utils::last_assistant_message_from_item; use crate::terminal; use crate::truncate::TruncationPolicy; use crate::turn_metadata::TurnMetadataState; use crate::util::error_or_panic; use async_channel::Receiver; use async_channel::Sender; use codex_hooks::HookEvent; use codex_hooks::HookEventAfterAgent; use codex_hooks::HookPayload; use codex_hooks::HookResult; use codex_hooks::Hooks; use codex_hooks::HooksConfig; use codex_network_proxy::NetworkProxy; use codex_protocol::ThreadId; use codex_protocol::approvals::ExecPolicyAmendment; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::config_types::WebSearchMode; use codex_protocol::dynamic_tools::DynamicToolResponse; use codex_protocol::dynamic_tools::DynamicToolSpec; use codex_protocol::items::PlanItem; use codex_protocol::items::TurnItem; use codex_protocol::items::UserMessageItem; use codex_protocol::mcp::CallToolResult; use codex_protocol::models::BaseInstructions; use codex_protocol::models::format_allow_prefixes; use codex_protocol::openai_models::ModelInfo; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::HasLegacyEvent; use codex_protocol::protocol::ItemCompletedEvent; use codex_protocol::protocol::ItemStartedEvent; use codex_protocol::protocol::RawResponseItemEvent; use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::SubAgentSource; use codex_protocol::protocol::TurnAbortReason; use codex_protocol::protocol::TurnContextItem; use codex_protocol::protocol::TurnContextNetworkItem; use codex_protocol::protocol::TurnStartedEvent; use codex_protocol::request_user_input::RequestUserInputArgs; use codex_protocol::request_user_input::RequestUserInputResponse; use codex_rmcp_client::ElicitationResponse; use codex_rmcp_client::OAuthCredentialsStoreMode; use futures::future::BoxFuture; use futures::prelude::*; use futures::stream::FuturesOrdered; use rmcp::model::ListResourceTemplatesResult; use rmcp::model::ListResourcesResult; use rmcp::model::PaginatedRequestParams; use rmcp::model::ReadResourceRequestParams; use rmcp::model::ReadResourceResult; use rmcp::model::RequestId; use serde_json; use serde_json::Value; use tokio::sync::Mutex; use tokio::sync::RwLock; use tokio::sync::oneshot; use tokio::sync::watch; use tokio_util::sync::CancellationToken; use tracing::Instrument; use tracing::debug; use tracing::error; use tracing::field; use tracing::info; use tracing::info_span; use tracing::instrument; use tracing::trace; use tracing::trace_span; use tracing::warn; use uuid::Uuid; use crate::ModelProviderInfo; use crate::client::ModelClient; use crate::client::ModelClientSession; use crate::client_common::Prompt; use crate::client_common::ResponseEvent; use crate::codex_thread::ThreadConfigSnapshot; use crate::compact::collect_user_messages; use crate::config::CONFIG_TOML_FILE; use crate::config::Config; use crate::config::Constrained; use crate::config::ConstraintResult; use crate::config::GhostSnapshotConfig; use crate::config::StartedNetworkProxy; use crate::config::resolve_web_search_mode_for_turn; use crate::config::types::McpServerConfig; use crate::config::types::ShellEnvironmentPolicy; use crate::context_manager::ContextManager; use crate::context_manager::TotalTokenUsageBreakdown; use crate::context_manager::estimate_item_token_count; use crate::environment_context::EnvironmentContext; use crate::error::CodexErr; use crate::error::Result as CodexResult; #[cfg(test)] use crate::exec::StreamOutput; #[derive(Debug, PartialEq)] pub enum SteerInputError { NoActiveTurn(Vec), ExpectedTurnMismatch { expected: String, actual: String }, EmptyInput, } use crate::exec_policy::ExecPolicyUpdateError; use crate::feedback_tags; use crate::file_watcher::FileWatcher; use crate::file_watcher::FileWatcherEvent; use crate::git_info::get_git_repo_root; use crate::instructions::UserInstructions; use crate::mcp::CODEX_APPS_MCP_SERVER_NAME; use crate::mcp::auth::compute_auth_statuses; use crate::mcp::effective_mcp_servers; use crate::mcp::maybe_prompt_and_install_mcp_dependencies; use crate::mcp::with_codex_apps_mcp; use crate::mcp_connection_manager::McpConnectionManager; use crate::mcp_connection_manager::filter_codex_apps_mcp_tools_only; use crate::mcp_connection_manager::filter_mcp_tools_by_name; use crate::mcp_connection_manager::filter_non_codex_apps_mcp_tools_only; use crate::memories; use crate::mentions::build_connector_slug_counts; use crate::mentions::build_skill_name_counts; use crate::mentions::collect_explicit_app_ids; use crate::mentions::collect_tool_mentions_from_messages; use crate::project_doc::get_user_instructions; use crate::proposed_plan_parser::ProposedPlanParser; use crate::proposed_plan_parser::ProposedPlanSegment; use crate::proposed_plan_parser::extract_proposed_plan_text; use crate::protocol::AgentMessageContentDeltaEvent; use crate::protocol::AgentReasoningSectionBreakEvent; use crate::protocol::ApplyPatchApprovalRequestEvent; use crate::protocol::AskForApproval; use crate::protocol::BackgroundEventEvent; use crate::protocol::DeprecationNoticeEvent; use crate::protocol::ErrorEvent; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecApprovalRequestEvent; use crate::protocol::McpServerRefreshConfig; use crate::protocol::ModelRerouteEvent; use crate::protocol::ModelRerouteReason; use crate::protocol::NetworkApprovalContext; use crate::protocol::Op; use crate::protocol::PlanDeltaEvent; use crate::protocol::RateLimitSnapshot; use crate::protocol::ReasoningContentDeltaEvent; use crate::protocol::ReasoningRawContentDeltaEvent; use crate::protocol::RequestUserInputEvent; use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::SessionNetworkProxyRuntime; use crate::protocol::SkillDependencies as ProtocolSkillDependencies; use crate::protocol::SkillErrorInfo; use crate::protocol::SkillInterface as ProtocolSkillInterface; use crate::protocol::SkillMetadata as ProtocolSkillMetadata; use crate::protocol::SkillToolDependency as ProtocolSkillToolDependency; use crate::protocol::StreamErrorEvent; use crate::protocol::Submission; use crate::protocol::TokenCountEvent; use crate::protocol::TokenUsage; use crate::protocol::TokenUsageInfo; use crate::protocol::TurnDiffEvent; use crate::protocol::WarningEvent; use crate::rollout::RolloutRecorder; use crate::rollout::RolloutRecorderParams; use crate::rollout::map_session_init_error; use crate::rollout::metadata; use crate::rollout::policy::EventPersistenceMode; use crate::shell; use crate::shell_snapshot::ShellSnapshot; use crate::skills::SkillError; use crate::skills::SkillInjections; use crate::skills::SkillLoadOutcome; use crate::skills::SkillMetadata; use crate::skills::SkillsManager; use crate::skills::build_skill_injections; use crate::skills::collect_env_var_dependencies; use crate::skills::collect_explicit_skill_mentions; use crate::skills::injection::ToolMentionKind; use crate::skills::injection::app_id_from_path; use crate::skills::injection::tool_kind_for_path; use crate::skills::resolve_skill_dependencies_for_turn; use crate::state::ActiveTurn; use crate::state::SessionServices; use crate::state::SessionState; use crate::state_db; use crate::tasks::GhostSnapshotTask; use crate::tasks::RegularTask; use crate::tasks::ReviewTask; use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; use crate::tools::ToolRouter; use crate::tools::context::SharedTurnDiffTracker; use crate::tools::handlers::SEARCH_TOOL_BM25_TOOL_NAME; use crate::tools::js_repl::JsReplHandle; use crate::tools::network_approval::NetworkApprovalService; use crate::tools::network_approval::build_blocked_request_observer; use crate::tools::network_approval::build_network_policy_decider; use crate::tools::parallel::ToolCallRuntime; use crate::tools::sandboxing::ApprovalStore; use crate::tools::spec::ToolsConfig; use crate::tools::spec::ToolsConfigParams; use crate::turn_diff_tracker::TurnDiffTracker; use crate::unified_exec::UnifiedExecProcessManager; use crate::util::backoff; use crate::windows_sandbox::WindowsSandboxLevelExt; use codex_async_utils::OrCancelExt; use codex_otel::OtelManager; use codex_otel::TelemetryAuthMode; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::models::ContentItem; use codex_protocol::models::DeveloperInstructions; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::InitialHistory; use codex_protocol::user_input::UserInput; use codex_utils_absolute_path::AbsolutePathBuf; use codex_utils_readiness::Readiness; use codex_utils_readiness::ReadinessFlag; /// The high-level interface to the Codex system. /// It operates as a queue pair where you send submissions and receive events. pub struct Codex { pub(crate) tx_sub: Sender, pub(crate) rx_event: Receiver, // Last known status of the agent. pub(crate) agent_status: watch::Receiver, pub(crate) session: Arc, } /// Wrapper returned by [`Codex::spawn`] containing the spawned [`Codex`], /// the submission id for the initial `ConfigureSession` request and the /// unique session id. pub struct CodexSpawnOk { pub codex: Codex, pub thread_id: ThreadId, #[deprecated(note = "use thread_id")] pub conversation_id: ThreadId, } pub(crate) const INITIAL_SUBMIT_ID: &str = ""; pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64; const CYBER_VERIFY_URL: &str = "https://chatgpt.com/cyber"; const CYBER_SAFETY_URL: &str = "https://developers.openai.com/codex/concepts/cyber-safety"; impl Codex { /// Spawn a new [`Codex`] and initialize the session. #[allow(clippy::too_many_arguments)] pub(crate) async fn spawn( mut config: Config, auth_manager: Arc, models_manager: Arc, skills_manager: Arc, file_watcher: Arc, conversation_history: InitialHistory, session_source: SessionSource, agent_control: AgentControl, dynamic_tools: Vec, persist_extended_history: bool, ) -> CodexResult { let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY); let (tx_event, rx_event) = async_channel::unbounded(); let loaded_skills = skills_manager.skills_for_config(&config); for err in &loaded_skills.errors { error!( "failed to load skill {}: {}", err.path.display(), err.message ); } if let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { depth, .. }) = session_source && depth >= MAX_THREAD_SPAWN_DEPTH { config.features.disable(Feature::Collab); } let allowed_skills_for_implicit_invocation = loaded_skills.allowed_skills_for_implicit_invocation(); let user_instructions = get_user_instructions(&config, Some(&allowed_skills_for_implicit_invocation)).await; let exec_policy = ExecPolicyManager::load(&config.config_layer_stack) .await .map_err(|err| CodexErr::Fatal(format!("failed to load rules: {err}")))?; let config = Arc::new(config); let _ = models_manager .list_models(crate::models_manager::manager::RefreshStrategy::OnlineIfUncached) .await; let model = models_manager .get_default_model( &config.model, crate::models_manager::manager::RefreshStrategy::OnlineIfUncached, ) .await; // Resolve base instructions for the session. Priority order: // 1. config.base_instructions override // 2. conversation history => session_meta.base_instructions // 3. base_instructions for current model let model_info = models_manager.get_model_info(model.as_str(), &config).await; let base_instructions = config .base_instructions .clone() .or_else(|| conversation_history.get_base_instructions().map(|s| s.text)) .unwrap_or_else(|| model_info.get_model_instructions(config.personality)); // Respect thread-start tools. When missing (resumed/forked threads), read from the db // first, then fall back to rollout-file tools. let persisted_tools = if dynamic_tools.is_empty() && config.features.enabled(Feature::Sqlite) { let thread_id = match &conversation_history { InitialHistory::Resumed(resumed) => Some(resumed.conversation_id), InitialHistory::Forked(_) => conversation_history.forked_from_id(), InitialHistory::New => None, }; match thread_id { Some(thread_id) => { let state_db_ctx = state_db::get_state_db(&config, None).await; state_db::get_dynamic_tools(state_db_ctx.as_deref(), thread_id, "codex_spawn") .await } None => None, } } else { None }; let dynamic_tools = if dynamic_tools.is_empty() { persisted_tools .or_else(|| conversation_history.get_dynamic_tools()) .unwrap_or_default() } else { dynamic_tools }; // TODO (aibrahim): Consolidate config.model and config.model_reasoning_effort into config.collaboration_mode // to avoid extracting these fields separately and constructing CollaborationMode here. let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model: model.clone(), reasoning_effort: config.model_reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions, personality: config.personality, base_instructions, compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source, dynamic_tools, persist_extended_history, }; // Generate a unique ID for the lifetime of this Codex session. let session_source_clone = session_configuration.session_source.clone(); let (agent_status_tx, agent_status_rx) = watch::channel(AgentStatus::PendingInit); let session_init_span = info_span!("session_init"); let session = Session::new( session_configuration, config.clone(), auth_manager.clone(), models_manager.clone(), exec_policy, tx_event.clone(), agent_status_tx.clone(), conversation_history, session_source_clone, skills_manager, file_watcher, agent_control, ) .instrument(session_init_span) .await .map_err(|e| { error!("Failed to create session: {e:#}"); map_session_init_error(&e, &config.codex_home) })?; let thread_id = session.conversation_id; // This task will run until Op::Shutdown is received. let session_loop_span = info_span!("session_loop", thread_id = %thread_id); tokio::spawn( submission_loop(Arc::clone(&session), config, rx_sub).instrument(session_loop_span), ); let codex = Codex { tx_sub, rx_event, agent_status: agent_status_rx, session, }; #[allow(deprecated)] Ok(CodexSpawnOk { codex, thread_id, conversation_id: thread_id, }) } /// Submit the `op` wrapped in a `Submission` with a unique ID. pub async fn submit(&self, op: Op) -> CodexResult { let id = Uuid::now_v7().to_string(); let sub = Submission { id: id.clone(), op }; self.submit_with_id(sub).await?; Ok(id) } /// Use sparingly: prefer `submit()` so Codex is responsible for generating /// unique IDs for each submission. pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> { self.tx_sub .send(sub) .await .map_err(|_| CodexErr::InternalAgentDied)?; Ok(()) } pub async fn next_event(&self) -> CodexResult { let event = self .rx_event .recv() .await .map_err(|_| CodexErr::InternalAgentDied)?; Ok(event) } pub async fn steer_input( &self, input: Vec, expected_turn_id: Option<&str>, ) -> Result { self.session.steer_input(input, expected_turn_id).await } pub(crate) async fn agent_status(&self) -> AgentStatus { self.agent_status.borrow().clone() } pub(crate) async fn thread_config_snapshot(&self) -> ThreadConfigSnapshot { let state = self.session.state.lock().await; state.session_configuration.thread_config_snapshot() } pub(crate) fn state_db(&self) -> Option { self.session.state_db() } pub(crate) fn enabled(&self, feature: Feature) -> bool { self.session.enabled(feature) } } /// Context for an initialized model agent /// /// A session has at most 1 running task at a time, and can be interrupted by user input. pub(crate) struct Session { pub(crate) conversation_id: ThreadId, tx_event: Sender, agent_status: watch::Sender, state: Mutex, /// The set of enabled features should be invariant for the lifetime of the /// session. features: Features, pending_mcp_server_refresh_config: Mutex>, pub(crate) active_turn: Mutex>, pub(crate) services: SessionServices, js_repl: Arc, next_internal_sub_id: AtomicU64, } /// The context needed for a single turn of the thread. #[derive(Debug)] pub(crate) struct TurnContext { pub(crate) sub_id: String, pub(crate) config: Arc, pub(crate) auth_manager: Option>, pub(crate) model_info: ModelInfo, pub(crate) otel_manager: OtelManager, pub(crate) provider: ModelProviderInfo, pub(crate) reasoning_effort: Option, pub(crate) reasoning_summary: ReasoningSummaryConfig, pub(crate) session_source: SessionSource, /// The session's current working directory. All relative paths provided by /// the model as well as sandbox policies are resolved against this path /// instead of `std::env::current_dir()`. pub(crate) cwd: PathBuf, pub(crate) developer_instructions: Option, pub(crate) compact_prompt: Option, pub(crate) user_instructions: Option, pub(crate) collaboration_mode: CollaborationMode, pub(crate) personality: Option, pub(crate) approval_policy: AskForApproval, pub(crate) sandbox_policy: SandboxPolicy, pub(crate) network: Option, pub(crate) windows_sandbox_level: WindowsSandboxLevel, pub(crate) shell_environment_policy: ShellEnvironmentPolicy, pub(crate) tools_config: ToolsConfig, pub(crate) features: Features, pub(crate) ghost_snapshot: GhostSnapshotConfig, pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, pub(crate) truncation_policy: TruncationPolicy, pub(crate) js_repl: Arc, pub(crate) dynamic_tools: Vec, pub(crate) turn_metadata_state: Arc, } impl TurnContext { pub(crate) fn model_context_window(&self) -> Option { let effective_context_window_percent = self.model_info.effective_context_window_percent; self.model_info.context_window.map(|context_window| { context_window.saturating_mul(effective_context_window_percent) / 100 }) } pub(crate) async fn with_model(&self, model: String, models_manager: &ModelsManager) -> Self { let mut config = (*self.config).clone(); config.model = Some(model.clone()); let model_info = models_manager.get_model_info(model.as_str(), &config).await; let truncation_policy = model_info.truncation_policy.into(); let supported_reasoning_levels = model_info .supported_reasoning_levels .iter() .map(|preset| preset.effort) .collect::>(); let reasoning_effort = if let Some(current_reasoning_effort) = self.reasoning_effort { if supported_reasoning_levels.contains(¤t_reasoning_effort) { Some(current_reasoning_effort) } else { supported_reasoning_levels .get(supported_reasoning_levels.len().saturating_sub(1) / 2) .copied() .or(model_info.default_reasoning_level) } } else { supported_reasoning_levels .get(supported_reasoning_levels.len().saturating_sub(1) / 2) .copied() .or(model_info.default_reasoning_level) }; config.model_reasoning_effort = reasoning_effort; let collaboration_mode = self.collaboration_mode .with_updates(Some(model.clone()), Some(reasoning_effort), None); let features = self.features.clone(); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_info: &model_info, features: &features, web_search_mode: self.tools_config.web_search_mode, }) .with_agent_roles(config.agent_roles.clone()); Self { sub_id: self.sub_id.clone(), config: Arc::new(config), auth_manager: self.auth_manager.clone(), model_info: model_info.clone(), otel_manager: self .otel_manager .clone() .with_model(model.as_str(), model_info.slug.as_str()), provider: self.provider.clone(), reasoning_effort, reasoning_summary: self.reasoning_summary, session_source: self.session_source.clone(), cwd: self.cwd.clone(), developer_instructions: self.developer_instructions.clone(), compact_prompt: self.compact_prompt.clone(), user_instructions: self.user_instructions.clone(), collaboration_mode, personality: self.personality, approval_policy: self.approval_policy, sandbox_policy: self.sandbox_policy.clone(), network: self.network.clone(), windows_sandbox_level: self.windows_sandbox_level, shell_environment_policy: self.shell_environment_policy.clone(), tools_config, features, ghost_snapshot: self.ghost_snapshot.clone(), final_output_json_schema: self.final_output_json_schema.clone(), codex_linux_sandbox_exe: self.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), truncation_policy, js_repl: Arc::clone(&self.js_repl), dynamic_tools: self.dynamic_tools.clone(), turn_metadata_state: self.turn_metadata_state.clone(), } } pub(crate) fn resolve_path(&self, path: Option) -> PathBuf { path.as_ref() .map(PathBuf::from) .map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p)) } pub(crate) fn compact_prompt(&self) -> &str { self.compact_prompt .as_deref() .unwrap_or(compact::SUMMARIZATION_PROMPT) } pub(crate) fn to_turn_context_item( &self, collaboration_mode: CollaborationMode, ) -> TurnContextItem { TurnContextItem { turn_id: Some(self.sub_id.clone()), cwd: self.cwd.clone(), approval_policy: self.approval_policy, sandbox_policy: self.sandbox_policy.clone(), network: self.turn_context_network_item(), model: self.model_info.slug.clone(), personality: self.personality, collaboration_mode: Some(collaboration_mode), effort: self.reasoning_effort, summary: self.reasoning_summary, user_instructions: self.user_instructions.clone(), developer_instructions: self.developer_instructions.clone(), final_output_json_schema: self.final_output_json_schema.clone(), truncation_policy: Some(self.truncation_policy.into()), } } fn turn_context_network_item(&self) -> Option { let network = self .config .config_layer_stack .requirements() .network .as_ref()?; Some(TurnContextNetworkItem { allowed_domains: network.allowed_domains.clone().unwrap_or_default(), denied_domains: network.denied_domains.clone().unwrap_or_default(), }) } } #[derive(Clone)] pub(crate) struct SessionConfiguration { /// Provider identifier ("openai", "openrouter", ...). provider: ModelProviderInfo, collaboration_mode: CollaborationMode, model_reasoning_summary: ReasoningSummaryConfig, /// Developer instructions that supplement the base instructions. developer_instructions: Option, /// Model instructions that are appended to the base instructions. user_instructions: Option, /// Personality preference for the model. personality: Option, /// Base instructions for the session. base_instructions: String, /// Compact prompt override. compact_prompt: Option, /// When to escalate for approval for execution approval_policy: Constrained, /// How to sandbox commands executed in the system sandbox_policy: Constrained, windows_sandbox_level: WindowsSandboxLevel, /// Working directory that should be treated as the *root* of the /// session. All relative paths supplied by the model as well as the /// execution sandbox are resolved against this directory **instead** /// of the process-wide current working directory. CLI front-ends are /// expected to expand this to an absolute path before sending the /// `ConfigureSession` operation so that the business-logic layer can /// operate deterministically. cwd: PathBuf, /// Directory containing all Codex state for this session. codex_home: PathBuf, /// Optional user-facing name for the thread, updated during the session. thread_name: Option, // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, /// Source of the session (cli, vscode, exec, mcp, ...) session_source: SessionSource, dynamic_tools: Vec, persist_extended_history: bool, } impl SessionConfiguration { pub(crate) fn codex_home(&self) -> &PathBuf { &self.codex_home } fn thread_config_snapshot(&self) -> ThreadConfigSnapshot { ThreadConfigSnapshot { model: self.collaboration_mode.model().to_string(), model_provider_id: self.original_config_do_not_use.model_provider_id.clone(), approval_policy: self.approval_policy.value(), sandbox_policy: self.sandbox_policy.get().clone(), cwd: self.cwd.clone(), reasoning_effort: self.collaboration_mode.reasoning_effort(), personality: self.personality, session_source: self.session_source.clone(), } } pub(crate) fn apply(&self, updates: &SessionSettingsUpdate) -> ConstraintResult { let mut next_configuration = self.clone(); if let Some(collaboration_mode) = updates.collaboration_mode.clone() { next_configuration.collaboration_mode = collaboration_mode; } if let Some(summary) = updates.reasoning_summary { next_configuration.model_reasoning_summary = summary; } if let Some(personality) = updates.personality { next_configuration.personality = Some(personality); } if let Some(approval_policy) = updates.approval_policy { next_configuration.approval_policy.set(approval_policy)?; } if let Some(sandbox_policy) = updates.sandbox_policy.clone() { next_configuration.sandbox_policy.set(sandbox_policy)?; } if let Some(windows_sandbox_level) = updates.windows_sandbox_level { next_configuration.windows_sandbox_level = windows_sandbox_level; } if let Some(cwd) = updates.cwd.clone() { next_configuration.cwd = cwd; } Ok(next_configuration) } } #[derive(Default, Clone)] pub(crate) struct SessionSettingsUpdate { pub(crate) cwd: Option, pub(crate) approval_policy: Option, pub(crate) sandbox_policy: Option, pub(crate) windows_sandbox_level: Option, pub(crate) collaboration_mode: Option, pub(crate) reasoning_summary: Option, pub(crate) final_output_json_schema: Option>, pub(crate) personality: Option, } impl Session { /// Builds the `x-codex-beta-features` header value for this session. /// /// `ModelClient` is session-scoped and intentionally does not depend on the full `Config`, so /// we precompute the comma-separated list of enabled experimental feature keys at session /// creation time and thread it into the client. fn build_model_client_beta_features_header(config: &Config) -> Option { let beta_features_header = FEATURES .iter() .filter_map(|spec| { if spec.stage.experimental_menu_description().is_some() && config.features.enabled(spec.id) { Some(spec.key) } else { None } }) .collect::>() .join(","); if beta_features_header.is_empty() { None } else { Some(beta_features_header) } } async fn start_managed_network_proxy( spec: &crate::config::NetworkProxySpec, sandbox_policy: &SandboxPolicy, network_policy_decider: Option>, blocked_request_observer: Option>, managed_network_requirements_enabled: bool, ) -> anyhow::Result<(StartedNetworkProxy, SessionNetworkProxyRuntime)> { let network_proxy = spec .start_proxy( sandbox_policy, network_policy_decider, blocked_request_observer, managed_network_requirements_enabled, ) .await .map_err(|err| anyhow::anyhow!("failed to start managed network proxy: {err}"))?; let session_network_proxy = { let proxy = network_proxy.proxy(); SessionNetworkProxyRuntime { http_addr: proxy.http_addr().to_string(), socks_addr: proxy.socks_addr().to_string(), admin_addr: proxy.admin_addr().to_string(), } }; Ok((network_proxy, session_network_proxy)) } /// Don't expand the number of mutated arguments on config. We are in the process of getting rid of it. pub(crate) fn build_per_turn_config(session_configuration: &SessionConfiguration) -> Config { // todo(aibrahim): store this state somewhere else so we don't need to mut config let config = session_configuration.original_config_do_not_use.clone(); let mut per_turn_config = (*config).clone(); per_turn_config.model_reasoning_effort = session_configuration.collaboration_mode.reasoning_effort(); per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary; per_turn_config.personality = session_configuration.personality; let resolved_web_search_mode = resolve_web_search_mode_for_turn( &per_turn_config.web_search_mode, session_configuration.sandbox_policy.get(), ); if let Err(err) = per_turn_config .web_search_mode .set(resolved_web_search_mode) { let fallback_value = per_turn_config.web_search_mode.value(); tracing::warn!( error = %err, ?resolved_web_search_mode, ?fallback_value, "resolved web_search_mode is disallowed by requirements; keeping constrained value" ); } per_turn_config.features = config.features.clone(); per_turn_config } pub(crate) async fn codex_home(&self) -> PathBuf { let state = self.state.lock().await; state.session_configuration.codex_home().clone() } fn start_file_watcher_listener(self: &Arc) { let mut rx = self.services.file_watcher.subscribe(); let weak_sess = Arc::downgrade(self); tokio::spawn(async move { loop { match rx.recv().await { Ok(FileWatcherEvent::SkillsChanged { .. }) => { let Some(sess) = weak_sess.upgrade() else { break; }; let event = Event { id: sess.next_internal_sub_id(), msg: EventMsg::SkillsUpdateAvailable, }; sess.send_event_raw(event).await; } Err(tokio::sync::broadcast::error::RecvError::Closed) => break, Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue, } } }); } #[allow(clippy::too_many_arguments)] fn make_turn_context( auth_manager: Option>, otel_manager: &OtelManager, provider: ModelProviderInfo, session_configuration: &SessionConfiguration, per_turn_config: Config, model_info: ModelInfo, network: Option, sub_id: String, js_repl: Arc, ) -> TurnContext { let reasoning_effort = session_configuration.collaboration_mode.reasoning_effort(); let reasoning_summary = session_configuration.model_reasoning_summary; let otel_manager = otel_manager.clone().with_model( session_configuration.collaboration_mode.model(), model_info.slug.as_str(), ); let session_source = session_configuration.session_source.clone(); let auth_manager_for_context = auth_manager; let provider_for_context = provider; let otel_manager_for_context = otel_manager; let per_turn_config = Arc::new(per_turn_config); let tools_config = ToolsConfig::new(&ToolsConfigParams { model_info: &model_info, features: &per_turn_config.features, web_search_mode: Some(per_turn_config.web_search_mode.value()), }) .with_agent_roles(per_turn_config.agent_roles.clone()); let cwd = session_configuration.cwd.clone(); let turn_metadata_state = Arc::new(TurnMetadataState::new( sub_id.clone(), cwd.clone(), session_configuration.sandbox_policy.get(), session_configuration.windows_sandbox_level, per_turn_config .features .enabled(Feature::UseLinuxSandboxBwrap), )); TurnContext { sub_id, config: per_turn_config.clone(), auth_manager: auth_manager_for_context, model_info: model_info.clone(), otel_manager: otel_manager_for_context, provider: provider_for_context, reasoning_effort, reasoning_summary, session_source, cwd, developer_instructions: session_configuration.developer_instructions.clone(), compact_prompt: session_configuration.compact_prompt.clone(), user_instructions: session_configuration.user_instructions.clone(), collaboration_mode: session_configuration.collaboration_mode.clone(), personality: session_configuration.personality, approval_policy: session_configuration.approval_policy.value(), sandbox_policy: session_configuration.sandbox_policy.get().clone(), network, windows_sandbox_level: session_configuration.windows_sandbox_level, shell_environment_policy: per_turn_config.permissions.shell_environment_policy.clone(), tools_config, features: per_turn_config.features.clone(), ghost_snapshot: per_turn_config.ghost_snapshot.clone(), final_output_json_schema: None, codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), truncation_policy: model_info.truncation_policy.into(), js_repl, dynamic_tools: session_configuration.dynamic_tools.clone(), turn_metadata_state, } } #[allow(clippy::too_many_arguments)] async fn new( mut session_configuration: SessionConfiguration, config: Arc, auth_manager: Arc, models_manager: Arc, exec_policy: ExecPolicyManager, tx_event: Sender, agent_status: watch::Sender, initial_history: InitialHistory, session_source: SessionSource, skills_manager: Arc, file_watcher: Arc, agent_control: AgentControl, ) -> anyhow::Result> { debug!( "Configuring session: model={}; provider={:?}", session_configuration.collaboration_mode.model(), session_configuration.provider ); if !session_configuration.cwd.is_absolute() { return Err(anyhow::anyhow!( "cwd is not absolute: {:?}", session_configuration.cwd )); } let forked_from_id = initial_history.forked_from_id(); let (conversation_id, rollout_params) = match &initial_history { InitialHistory::New | InitialHistory::Forked(_) => { let conversation_id = ThreadId::default(); ( conversation_id, RolloutRecorderParams::new( conversation_id, forked_from_id, session_source, BaseInstructions { text: session_configuration.base_instructions.clone(), }, session_configuration.dynamic_tools.clone(), if session_configuration.persist_extended_history { EventPersistenceMode::Extended } else { EventPersistenceMode::Limited }, ), ) } InitialHistory::Resumed(resumed_history) => ( resumed_history.conversation_id, RolloutRecorderParams::resume( resumed_history.rollout_path.clone(), if session_configuration.persist_extended_history { EventPersistenceMode::Extended } else { EventPersistenceMode::Limited }, ), ), }; let state_builder = match &initial_history { InitialHistory::Resumed(resumed) => metadata::builder_from_items( resumed.history.as_slice(), resumed.rollout_path.as_path(), ), InitialHistory::New | InitialHistory::Forked(_) => None, }; // Kick off independent async setup tasks in parallel to reduce startup latency. // // - initialize RolloutRecorder with new or resumed session info // - perform default shell discovery // - load history metadata let rollout_fut = async { if config.ephemeral { Ok::<_, anyhow::Error>((None, None)) } else { let state_db_ctx = state_db::init_if_enabled(&config, None).await; let rollout_recorder = RolloutRecorder::new( &config, rollout_params, state_db_ctx.clone(), state_builder.clone(), ) .await?; Ok((Some(rollout_recorder), state_db_ctx)) } }; let history_meta_fut = crate::message_history::history_metadata(&config); let auth_manager_clone = Arc::clone(&auth_manager); let config_for_mcp = Arc::clone(&config); let auth_and_mcp_fut = async move { let auth = auth_manager_clone.auth().await; let mcp_servers = effective_mcp_servers(&config_for_mcp, auth.as_ref()); let auth_statuses = compute_auth_statuses( mcp_servers.iter(), config_for_mcp.mcp_oauth_credentials_store_mode, ) .await; (auth, mcp_servers, auth_statuses) }; // Join all independent futures. let ( rollout_recorder_and_state_db, (history_log_id, history_entry_count), (auth, mcp_servers, auth_statuses), ) = tokio::join!(rollout_fut, history_meta_fut, auth_and_mcp_fut); let (rollout_recorder, state_db_ctx) = rollout_recorder_and_state_db.map_err(|e| { error!("failed to initialize rollout recorder: {e:#}"); e })?; let rollout_path = rollout_recorder .as_ref() .map(|rec| rec.rollout_path.clone()); let mut post_session_configured_events = Vec::::new(); for usage in config.features.legacy_feature_usages() { post_session_configured_events.push(Event { id: INITIAL_SUBMIT_ID.to_owned(), msg: EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary: usage.summary.clone(), details: usage.details.clone(), }), }); } if crate::config::uses_deprecated_instructions_file(&config.config_layer_stack) { post_session_configured_events.push(Event { id: INITIAL_SUBMIT_ID.to_owned(), msg: EventMsg::DeprecationNotice(DeprecationNoticeEvent { summary: "`experimental_instructions_file` is deprecated and ignored. Use `model_instructions_file` instead." .to_string(), details: Some( "Move the setting to `model_instructions_file` in config.toml (or under a profile) to load instructions from a file." .to_string(), ), }), }); } for message in &config.startup_warnings { post_session_configured_events.push(Event { id: "".to_owned(), msg: EventMsg::Warning(WarningEvent { message: message.clone(), }), }); } maybe_push_unstable_features_warning(&config, &mut post_session_configured_events); if config.permissions.approval_policy.value() == AskForApproval::OnFailure { post_session_configured_events.push(Event { id: "".to_owned(), msg: EventMsg::Warning(WarningEvent { message: "`on-failure` approval policy is deprecated and will be removed in a future release. Use `on-request` for interactive approvals or `never` for non-interactive runs.".to_string(), }), }); } let auth = auth.as_ref(); let auth_mode = auth.map(CodexAuth::auth_mode).map(TelemetryAuthMode::from); let otel_manager = OtelManager::new( conversation_id, session_configuration.collaboration_mode.model(), session_configuration.collaboration_mode.model(), auth.and_then(CodexAuth::get_account_id), auth.and_then(CodexAuth::get_account_email), auth_mode, crate::default_client::originator().value, config.otel.log_user_prompt, terminal::user_agent(), session_configuration.session_source.clone(), ); config.features.emit_metrics(&otel_manager); otel_manager.counter( "codex.thread.started", 1, &[( "is_git", if get_git_repo_root(&session_configuration.cwd).is_some() { "true" } else { "false" }, )], ); otel_manager.conversation_starts( config.model_provider.name.as_str(), session_configuration.collaboration_mode.reasoning_effort(), config.model_reasoning_summary, config.model_context_window, config.model_auto_compact_token_limit, config.permissions.approval_policy.value(), config.permissions.sandbox_policy.get().clone(), mcp_servers.keys().map(String::as_str).collect(), config.active_profile.clone(), ); let mut default_shell = shell::default_user_shell(); // Create the mutable state for the Session. let shell_snapshot_tx = if config.features.enabled(Feature::ShellSnapshot) { ShellSnapshot::start_snapshotting( config.codex_home.clone(), conversation_id, session_configuration.cwd.clone(), &mut default_shell, otel_manager.clone(), ) } else { let (tx, rx) = watch::channel(None); default_shell.shell_snapshot = rx; tx }; let thread_name = match session_index::find_thread_name_by_id(&config.codex_home, &conversation_id).await { Ok(name) => name, Err(err) => { warn!("Failed to read session index for thread name: {err}"); None } }; session_configuration.thread_name = thread_name.clone(); let mut state = SessionState::new(session_configuration.clone()); let managed_network_requirements_enabled = config.managed_network_requirements_enabled(); let network_approval = Arc::new(NetworkApprovalService::default()); // The managed proxy can call back into core for allowlist-miss decisions. let network_policy_decider_session = if managed_network_requirements_enabled { config .permissions .network .as_ref() .map(|_| Arc::new(RwLock::new(std::sync::Weak::::new()))) } else { None }; let blocked_request_observer = if managed_network_requirements_enabled { config .permissions .network .as_ref() .map(|_| build_blocked_request_observer(Arc::clone(&network_approval))) } else { None }; let network_policy_decider = network_policy_decider_session .as_ref() .map(|network_policy_decider_session| { build_network_policy_decider( Arc::clone(&network_approval), Arc::clone(network_policy_decider_session), ) }); let (network_proxy, session_network_proxy) = if let Some(spec) = config.permissions.network.as_ref() { let (network_proxy, session_network_proxy) = Self::start_managed_network_proxy( spec, config.permissions.sandbox_policy.get(), network_policy_decider.as_ref().map(Arc::clone), blocked_request_observer.as_ref().map(Arc::clone), managed_network_requirements_enabled, ) .await?; (Some(network_proxy), Some(session_network_proxy)) } else { (None, None) }; let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: Mutex::new(CancellationToken::new()), unified_exec_manager: UnifiedExecProcessManager::default(), analytics_events_client: AnalyticsEventsClient::new( Arc::clone(&config), Arc::clone(&auth_manager), ), hooks: Hooks::new(HooksConfig { legacy_notify_argv: config.notify.clone(), }), rollout: Mutex::new(rollout_recorder), user_shell: Arc::new(default_shell), shell_snapshot_tx, show_raw_agent_reasoning: config.show_raw_agent_reasoning, exec_policy, auth_manager: Arc::clone(&auth_manager), otel_manager, models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, file_watcher, agent_control, network_proxy, network_approval: Arc::clone(&network_approval), state_db: state_db_ctx.clone(), model_client: ModelClient::new( Some(Arc::clone(&auth_manager)), conversation_id, session_configuration.provider.clone(), session_configuration.session_source.clone(), config.model_verbosity, config.features.enabled(Feature::ResponsesWebsockets) || config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::EnableRequestCompression), config.features.enabled(Feature::RuntimeMetrics), Self::build_model_client_beta_features_header(config.as_ref()), ), }; let js_repl = Arc::new(JsReplHandle::with_node_path( config.js_repl_node_path.clone(), config.codex_home.clone(), )); let prewarm_model_info = models_manager .get_model_info(session_configuration.collaboration_mode.model(), &config) .await; let startup_regular_task = RegularTask::with_startup_prewarm( services.model_client.clone(), services.otel_manager.clone(), prewarm_model_info, ); state.set_startup_regular_task(startup_regular_task); let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), agent_status, state: Mutex::new(state), features: config.features.clone(), pending_mcp_server_refresh_config: Mutex::new(None), active_turn: Mutex::new(None), services, js_repl, next_internal_sub_id: AtomicU64::new(0), }); if let Some(network_policy_decider_session) = network_policy_decider_session { let mut guard = network_policy_decider_session.write().await; *guard = Arc::downgrade(&sess); } // Dispatch the SessionConfiguredEvent first and then report any errors. // If resuming, include converted initial messages in the payload so UIs can render them immediately. let initial_messages = initial_history.get_event_msgs(); let events = std::iter::once(Event { id: INITIAL_SUBMIT_ID.to_owned(), msg: EventMsg::SessionConfigured(SessionConfiguredEvent { session_id: conversation_id, forked_from_id, thread_name: session_configuration.thread_name.clone(), model: session_configuration.collaboration_mode.model().to_string(), model_provider_id: config.model_provider_id.clone(), approval_policy: session_configuration.approval_policy.value(), sandbox_policy: session_configuration.sandbox_policy.get().clone(), cwd: session_configuration.cwd.clone(), reasoning_effort: session_configuration.collaboration_mode.reasoning_effort(), history_log_id, history_entry_count, initial_messages, network_proxy: session_network_proxy, rollout_path, }), }) .chain(post_session_configured_events.into_iter()); for event in events { sess.send_event_raw(event).await; } // Start the watcher after SessionConfigured so it cannot emit earlier events. sess.start_file_watcher_listener(); // Construct sandbox_state before initialize() so it can be sent to each // MCP server immediately after it becomes ready (avoiding blocking). let sandbox_state = SandboxState { sandbox_policy: session_configuration.sandbox_policy.get().clone(), codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), sandbox_cwd: session_configuration.cwd.clone(), use_linux_sandbox_bwrap: config.features.enabled(Feature::UseLinuxSandboxBwrap), }; let mut required_mcp_servers: Vec = mcp_servers .iter() .filter(|(_, server)| server.enabled && server.required) .map(|(name, _)| name.clone()) .collect(); required_mcp_servers.sort(); let cancel_token = sess.mcp_startup_cancellation_token().await; sess.services .mcp_connection_manager .write() .await .initialize( &mcp_servers, config.mcp_oauth_credentials_store_mode, auth_statuses.clone(), tx_event.clone(), cancel_token, sandbox_state, ) .await; if !required_mcp_servers.is_empty() { let failures = sess .services .mcp_connection_manager .read() .await .required_startup_failures(&required_mcp_servers) .await; if !failures.is_empty() { let details = failures .iter() .map(|failure| format!("{}: {}", failure.server, failure.error)) .collect::>() .join("; "); return Err(anyhow::anyhow!( "required MCP servers failed to initialize: {details}" )); } } // record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted. sess.record_initial_history(initial_history).await; memories::start_memories_startup_task( &sess, Arc::clone(&config), &session_configuration.session_source, ); Ok(sess) } pub(crate) fn get_tx_event(&self) -> Sender { self.tx_event.clone() } pub(crate) fn state_db(&self) -> Option { self.services.state_db.clone() } /// Ensure all rollout writes are durably flushed. pub(crate) async fn flush_rollout(&self) { let recorder = { let guard = self.services.rollout.lock().await; guard.clone() }; if let Some(rec) = recorder && let Err(e) = rec.flush().await { warn!("failed to flush rollout recorder: {e}"); } } pub(crate) async fn ensure_rollout_materialized(&self) { let recorder = { let guard = self.services.rollout.lock().await; guard.clone() }; if let Some(rec) = recorder && let Err(e) = rec.persist().await { warn!("failed to materialize rollout recorder: {e}"); } } fn next_internal_sub_id(&self) -> String { let id = self .next_internal_sub_id .fetch_add(1, std::sync::atomic::Ordering::SeqCst); format!("auto-compact-{id}") } pub(crate) async fn get_total_token_usage(&self) -> i64 { let state = self.state.lock().await; state.get_total_token_usage(state.server_reasoning_included()) } pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { let state = self.state.lock().await; state.history.get_total_token_usage_breakdown() } pub(crate) async fn get_estimated_token_count( &self, turn_context: &TurnContext, ) -> Option { let state = self.state.lock().await; state.history.estimate_token_count(turn_context) } pub(crate) async fn get_base_instructions(&self) -> BaseInstructions { let state = self.state.lock().await; BaseInstructions { text: state.session_configuration.base_instructions.clone(), } } pub(crate) async fn merge_mcp_tool_selection(&self, tool_names: Vec) -> Vec { let mut state = self.state.lock().await; state.merge_mcp_tool_selection(tool_names) } pub(crate) async fn set_mcp_tool_selection(&self, tool_names: Vec) { let mut state = self.state.lock().await; state.set_mcp_tool_selection(tool_names); } pub(crate) async fn get_mcp_tool_selection(&self) -> Option> { let state = self.state.lock().await; state.get_mcp_tool_selection() } pub(crate) async fn clear_mcp_tool_selection(&self) { let mut state = self.state.lock().await; state.clear_mcp_tool_selection(); } // Merges connector IDs into the session-level explicit connector selection. pub(crate) async fn merge_connector_selection( &self, connector_ids: HashSet, ) -> HashSet { let mut state = self.state.lock().await; state.merge_connector_selection(connector_ids) } // Returns the connector IDs currently selected for this session. pub(crate) async fn get_connector_selection(&self) -> HashSet { let state = self.state.lock().await; state.get_connector_selection() } // Clears connector IDs that were accumulated for explicit selection. pub(crate) async fn clear_connector_selection(&self) { let mut state = self.state.lock().await; state.clear_connector_selection(); } async fn record_initial_history(&self, conversation_history: InitialHistory) { let turn_context = self.new_default_turn().await; self.clear_mcp_tool_selection().await; match conversation_history { InitialHistory::New => { // Build and record initial items (user instructions + environment context) let items = self.build_initial_context(&turn_context).await; self.record_conversation_items(&turn_context, &items).await; { let mut state = self.state.lock().await; state.initial_context_seeded = true; } self.set_previous_model(None).await; // Ensure initial items are visible to immediate readers (e.g., tests, forks). self.flush_rollout().await; } InitialHistory::Resumed(resumed_history) => { let rollout_items = resumed_history.history; let restored_tool_selection = Self::extract_mcp_tool_selection_from_rollout(&rollout_items); let previous_model = Self::last_rollout_model_name(&rollout_items) .map(std::string::ToString::to_string); { let mut state = self.state.lock().await; state.initial_context_seeded = false; } self.set_previous_model(previous_model).await; // If resuming, warn when the last recorded model differs from the current one. let curr = turn_context.model_info.slug.as_str(); if let Some(prev) = Self::last_rollout_model_name(&rollout_items).filter(|p| *p != curr) { warn!("resuming session with different model: previous={prev}, current={curr}"); self.send_event( &turn_context, EventMsg::Warning(WarningEvent { message: format!( "This session was recorded with model `{prev}` but is resuming with `{curr}`. \ Consider switching back to `{prev}` as it may affect Codex performance." ), }), ) .await; } // Always add response items to conversation history let reconstructed_history = self .reconstruct_history_from_rollout(&turn_context, &rollout_items) .await; if !reconstructed_history.is_empty() { self.record_into_history(&reconstructed_history, &turn_context) .await; } // Seed usage info from the recorded rollout so UIs can show token counts // immediately on resume/fork. if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } if let Some(selected_tools) = restored_tool_selection { self.set_mcp_tool_selection(selected_tools).await; } // Defer seeding the session's initial context until the first turn starts so // turn/start overrides can be merged before we write to the rollout. self.flush_rollout().await; } InitialHistory::Forked(rollout_items) => { let restored_tool_selection = Self::extract_mcp_tool_selection_from_rollout(&rollout_items); let previous_model = Self::last_rollout_model_name(&rollout_items) .map(std::string::ToString::to_string); self.set_previous_model(previous_model).await; // Always add response items to conversation history let reconstructed_history = self .reconstruct_history_from_rollout(&turn_context, &rollout_items) .await; if !reconstructed_history.is_empty() { self.record_into_history(&reconstructed_history, &turn_context) .await; } // Seed usage info from the recorded rollout so UIs can show token counts // immediately on resume/fork. if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } if let Some(selected_tools) = restored_tool_selection { self.set_mcp_tool_selection(selected_tools).await; } // If persisting, persist all rollout items as-is (recorder filters) if !rollout_items.is_empty() { self.persist_rollout_items(&rollout_items).await; } // Append the current session's initial context after the reconstructed history. let initial_context = self.build_initial_context(&turn_context).await; self.record_conversation_items(&turn_context, &initial_context) .await; { let mut state = self.state.lock().await; state.initial_context_seeded = true; } // Forked threads should remain file-backed immediately after startup. self.ensure_rollout_materialized().await; // Flush after seeding history and any persisted rollout copy. self.flush_rollout().await; } } } fn last_rollout_model_name(rollout_items: &[RolloutItem]) -> Option<&str> { rollout_items.iter().rev().find_map(|it| { if let RolloutItem::TurnContext(ctx) = it { Some(ctx.model.as_str()) } else { None } }) } fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option { rollout_items.iter().rev().find_map(|item| match item { RolloutItem::EventMsg(EventMsg::TokenCount(ev)) => ev.info.clone(), _ => None, }) } fn extract_mcp_tool_selection_from_rollout( rollout_items: &[RolloutItem], ) -> Option> { let mut search_call_ids = HashSet::new(); let mut active_selected_tools: Option> = None; for item in rollout_items { let RolloutItem::ResponseItem(response_item) = item else { continue; }; match response_item { ResponseItem::FunctionCall { name, call_id, .. } => { if name == SEARCH_TOOL_BM25_TOOL_NAME { search_call_ids.insert(call_id.clone()); } } ResponseItem::FunctionCallOutput { call_id, output } => { if !search_call_ids.contains(call_id) { continue; } let Some(content) = output.body.to_text() else { continue; }; let Ok(payload) = serde_json::from_str::(&content) else { continue; }; let Some(selected_tools) = payload .get("active_selected_tools") .and_then(Value::as_array) else { continue; }; let Some(selected_tools) = selected_tools .iter() .map(|value| value.as_str().map(str::to_string)) .collect::>>() else { continue; }; active_selected_tools = Some(selected_tools); } _ => {} } } active_selected_tools } async fn previous_model(&self) -> Option { let state = self.state.lock().await; state.previous_model() } pub(crate) async fn set_previous_model(&self, previous_model: Option) { let mut state = self.state.lock().await; state.set_previous_model(previous_model); } fn maybe_refresh_shell_snapshot_for_cwd( &self, previous_cwd: &Path, next_cwd: &Path, codex_home: &Path, ) { if previous_cwd == next_cwd { return; } if !self.features.enabled(Feature::ShellSnapshot) { return; } ShellSnapshot::refresh_snapshot( codex_home.to_path_buf(), self.conversation_id, next_cwd.to_path_buf(), self.services.user_shell.as_ref().clone(), self.services.shell_snapshot_tx.clone(), self.services.otel_manager.clone(), ); } pub(crate) async fn update_settings( &self, updates: SessionSettingsUpdate, ) -> ConstraintResult<()> { let mut state = self.state.lock().await; match state.session_configuration.apply(&updates) { Ok(updated) => { let previous_cwd = state.session_configuration.cwd.clone(); let next_cwd = updated.cwd.clone(); let codex_home = updated.codex_home.clone(); state.session_configuration = updated; drop(state); self.maybe_refresh_shell_snapshot_for_cwd(&previous_cwd, &next_cwd, &codex_home); Ok(()) } Err(err) => { warn!("rejected session settings update: {err}"); Err(err) } } } pub(crate) async fn new_turn_with_sub_id( &self, sub_id: String, updates: SessionSettingsUpdate, ) -> ConstraintResult> { let (session_configuration, sandbox_policy_changed, previous_cwd, codex_home) = { let mut state = self.state.lock().await; match state.session_configuration.clone().apply(&updates) { Ok(next) => { let previous_cwd = state.session_configuration.cwd.clone(); let sandbox_policy_changed = state.session_configuration.sandbox_policy != next.sandbox_policy; let codex_home = next.codex_home.clone(); state.session_configuration = next.clone(); (next, sandbox_policy_changed, previous_cwd, codex_home) } Err(err) => { drop(state); self.send_event_raw(Event { id: sub_id.clone(), msg: EventMsg::Error(ErrorEvent { message: err.to_string(), codex_error_info: Some(CodexErrorInfo::BadRequest), }), }) .await; return Err(err); } } }; self.maybe_refresh_shell_snapshot_for_cwd( &previous_cwd, &session_configuration.cwd, &codex_home, ); Ok(self .new_turn_from_configuration( sub_id, session_configuration, updates.final_output_json_schema, sandbox_policy_changed, ) .await) } async fn new_turn_from_configuration( &self, sub_id: String, session_configuration: SessionConfiguration, final_output_json_schema: Option>, sandbox_policy_changed: bool, ) -> Arc { let per_turn_config = Self::build_per_turn_config(&session_configuration); if sandbox_policy_changed { let sandbox_state = SandboxState { sandbox_policy: per_turn_config.permissions.sandbox_policy.get().clone(), codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), sandbox_cwd: per_turn_config.cwd.clone(), use_linux_sandbox_bwrap: per_turn_config .features .enabled(Feature::UseLinuxSandboxBwrap), }; if let Err(e) = self .services .mcp_connection_manager .read() .await .notify_sandbox_state_change(&sandbox_state) .await { warn!("Failed to notify sandbox state change to MCP servers: {e:#}"); } } let model_info = self .services .models_manager .get_model_info( session_configuration.collaboration_mode.model(), &per_turn_config, ) .await; let mut turn_context: TurnContext = Self::make_turn_context( Some(Arc::clone(&self.services.auth_manager)), &self.services.otel_manager, session_configuration.provider.clone(), &session_configuration, per_turn_config, model_info, self.services .network_proxy .as_ref() .map(StartedNetworkProxy::proxy), sub_id, Arc::clone(&self.js_repl), ); if let Some(final_schema) = final_output_json_schema { turn_context.final_output_json_schema = final_schema; } let turn_context = Arc::new(turn_context); turn_context.turn_metadata_state.spawn_git_enrichment_task(); turn_context } pub(crate) async fn maybe_emit_unknown_model_warning_for_turn(&self, tc: &TurnContext) { if tc.model_info.used_fallback_model_metadata { self.send_event( tc, EventMsg::Warning(WarningEvent { message: format!( "Model metadata for `{}` not found. Defaulting to fallback metadata; this can degrade performance and cause issues.", tc.model_info.slug ), }), ) .await; } } pub(crate) async fn new_default_turn(&self) -> Arc { self.new_default_turn_with_sub_id(self.next_internal_sub_id()) .await } pub(crate) async fn take_startup_regular_task(&self) -> Option { let mut state = self.state.lock().await; state.take_startup_regular_task() } async fn get_config(&self) -> std::sync::Arc { let state = self.state.lock().await; state .session_configuration .original_config_do_not_use .clone() } pub(crate) async fn reload_user_config_layer(&self) { let config_toml_path = { let state = self.state.lock().await; state .session_configuration .codex_home .join(CONFIG_TOML_FILE) }; let user_config = match std::fs::read_to_string(&config_toml_path) { Ok(contents) => match toml::from_str::(&contents) { Ok(config) => config, Err(err) => { warn!("failed to parse user config while reloading layer: {err}"); return; } }, Err(err) if err.kind() == std::io::ErrorKind::NotFound => { toml::Value::Table(Default::default()) } Err(err) => { warn!("failed to read user config while reloading layer: {err}"); return; } }; let config_toml_path = match AbsolutePathBuf::try_from(config_toml_path) { Ok(path) => path, Err(err) => { warn!("failed to resolve user config path while reloading layer: {err}"); return; } }; let mut state = self.state.lock().await; let mut config = (*state.session_configuration.original_config_do_not_use).clone(); config.config_layer_stack = config .config_layer_stack .with_user_config(&config_toml_path, user_config); state.session_configuration.original_config_do_not_use = Arc::new(config); } pub(crate) async fn new_default_turn_with_sub_id(&self, sub_id: String) -> Arc { let session_configuration = { let state = self.state.lock().await; state.session_configuration.clone() }; self.new_turn_from_configuration(sub_id, session_configuration, None, false) .await } pub(crate) async fn current_collaboration_mode(&self) -> CollaborationMode { let state = self.state.lock().await; state.session_configuration.collaboration_mode.clone() } pub(crate) fn is_model_switch_developer_message(item: &ResponseItem) -> bool { let ResponseItem::Message { role, content, .. } = item else { return false; }; role == "developer" && content.iter().any(|content_item| { matches!( content_item, ContentItem::InputText { text } if text.starts_with("") ) }) } fn build_settings_update_items( &self, previous_context: Option<&Arc>, resumed_model: Option<&str>, current_context: &TurnContext, ) -> Vec { // TODO: Make context updates a pure diff of persisted previous/current TurnContextItem // state so replay/backtracking is deterministic. Runtime inputs that affect model-visible // context (shell, exec policy, feature gates, resumed model bridge) should be persisted // state or explicit non-state replay events. let shell = self.user_shell(); let exec_policy = self.services.exec_policy.current(); crate::context_manager::updates::build_settings_update_items( previous_context.map(Arc::as_ref), resumed_model, current_context, shell.as_ref(), exec_policy.as_ref(), self.features.enabled(Feature::Personality), ) } /// Persist the event to rollout and send it to clients. pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) { let legacy_source = msg.clone(); let event = Event { id: turn_context.sub_id.clone(), msg, }; self.send_event_raw(event).await; let show_raw_agent_reasoning = self.show_raw_agent_reasoning(); for legacy in legacy_source.as_legacy_events(show_raw_agent_reasoning) { let legacy_event = Event { id: turn_context.sub_id.clone(), msg: legacy, }; self.send_event_raw(legacy_event).await; } } pub(crate) async fn send_event_raw(&self, event: Event) { // Record the last known agent status. if let Some(status) = agent_status_from_event(&event.msg) { self.agent_status.send_replace(status); } // Persist the event into rollout (recorder filters as needed) let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())]; self.persist_rollout_items(&rollout_items).await; if let Err(e) = self.tx_event.send(event).await { debug!("dropping event because channel is closed: {e}"); } } /// Persist the event to the rollout file, flush it, and only then deliver it to clients. /// /// Most events can be delivered immediately after queueing the rollout write, but some /// clients (e.g. app-server thread/rollback) re-read the rollout file synchronously on /// receipt of the event and depend on the marker already being visible on disk. pub(crate) async fn send_event_raw_flushed(&self, event: Event) { // Record the last known agent status. if let Some(status) = agent_status_from_event(&event.msg) { self.agent_status.send_replace(status); } self.persist_rollout_items(&[RolloutItem::EventMsg(event.msg.clone())]) .await; self.flush_rollout().await; if let Err(e) = self.tx_event.send(event).await { debug!("dropping event because channel is closed: {e}"); } } pub(crate) async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) { self.send_event( turn_context, EventMsg::ItemStarted(ItemStartedEvent { thread_id: self.conversation_id, turn_id: turn_context.sub_id.clone(), item: item.clone(), }), ) .await; } pub(crate) async fn emit_turn_item_completed( &self, turn_context: &TurnContext, item: TurnItem, ) { self.send_event( turn_context, EventMsg::ItemCompleted(ItemCompletedEvent { thread_id: self.conversation_id, turn_id: turn_context.sub_id.clone(), item, }), ) .await; } /// Adds an execpolicy amendment to both the in-memory and on-disk policies so future /// commands can use the newly approved prefix. pub(crate) async fn persist_execpolicy_amendment( &self, amendment: &ExecPolicyAmendment, ) -> Result<(), ExecPolicyUpdateError> { let codex_home = self .state .lock() .await .session_configuration .codex_home() .clone(); self.services .exec_policy .append_amendment_and_update(&codex_home, amendment) .await?; Ok(()) } pub(crate) async fn turn_context_for_sub_id(&self, sub_id: &str) -> Option> { let active = self.active_turn.lock().await; active .as_ref() .and_then(|turn| turn.tasks.get(sub_id)) .map(|task| Arc::clone(&task.turn_context)) } async fn active_turn_context_and_cancellation_token( &self, ) -> Option<(Arc, CancellationToken)> { let active = self.active_turn.lock().await; let (_, task) = active.as_ref()?.tasks.first()?; Some(( Arc::clone(&task.turn_context), task.cancellation_token.child_token(), )) } pub(crate) async fn record_execpolicy_amendment_message( &self, sub_id: &str, amendment: &ExecPolicyAmendment, ) { let Some(prefixes) = format_allow_prefixes(vec![amendment.command.clone()]) else { warn!("execpolicy amendment for {sub_id} had no command prefix"); return; }; let text = format!("Approved command prefix saved:\n{prefixes}"); let message: ResponseItem = DeveloperInstructions::new(text.clone()).into(); if let Some(turn_context) = self.turn_context_for_sub_id(sub_id).await { self.record_conversation_items(&turn_context, std::slice::from_ref(&message)) .await; return; } if self .inject_response_items(vec![ResponseInputItem::Message { role: "developer".to_string(), content: vec![ContentItem::InputText { text }], }]) .await .is_err() { warn!("no active turn found to record execpolicy amendment message for {sub_id}"); } } /// Emit an exec approval request event and await the user's decision. /// /// The request is keyed by `call_id` + `approval_id` so matching responses are delivered /// to the correct in-flight turn. If the task is aborted, this returns the /// default `ReviewDecision` (`Denied`). #[allow(clippy::too_many_arguments)] pub async fn request_command_approval( &self, turn_context: &TurnContext, call_id: String, approval_id: Option, command: Vec, cwd: PathBuf, reason: Option, network_approval_context: Option, proposed_execpolicy_amendment: Option, ) -> ReviewDecision { // command-level approvals use `call_id`. // `approval_id` is only present for subcommand callbacks (execve intercept) let effective_approval_id = approval_id.clone().unwrap_or_else(|| call_id.clone()); // Add the tx_approve callback to the map before sending the request. let (tx_approve, rx_approve) = oneshot::channel(); let prev_entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.insert_pending_approval(effective_approval_id.clone(), tx_approve) } None => None, } }; if prev_entry.is_some() { warn!("Overwriting existing pending approval for call_id: {effective_approval_id}"); } let parsed_cmd = parse_command(&command); let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent { call_id, approval_id, turn_id: turn_context.sub_id.clone(), command, cwd, reason, network_approval_context, proposed_execpolicy_amendment, parsed_cmd, }); self.send_event(turn_context, event).await; rx_approve.await.unwrap_or_default() } pub async fn request_patch_approval( &self, turn_context: &TurnContext, call_id: String, changes: HashMap, reason: Option, grant_root: Option, ) -> oneshot::Receiver { // Add the tx_approve callback to the map before sending the request. let (tx_approve, rx_approve) = oneshot::channel(); let approval_id = call_id.clone(); let prev_entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.insert_pending_approval(approval_id.clone(), tx_approve) } None => None, } }; if prev_entry.is_some() { warn!("Overwriting existing pending approval for call_id: {approval_id}"); } let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { call_id, turn_id: turn_context.sub_id.clone(), changes, reason, grant_root, }); self.send_event(turn_context, event).await; rx_approve } pub async fn request_user_input( &self, turn_context: &TurnContext, call_id: String, args: RequestUserInputArgs, ) -> Option { let sub_id = turn_context.sub_id.clone(); let (tx_response, rx_response) = oneshot::channel(); let event_id = sub_id.clone(); let prev_entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.insert_pending_user_input(sub_id, tx_response) } None => None, } }; if prev_entry.is_some() { warn!("Overwriting existing pending user input for sub_id: {event_id}"); } let event = EventMsg::RequestUserInput(RequestUserInputEvent { call_id, turn_id: turn_context.sub_id.clone(), questions: args.questions, }); self.send_event(turn_context, event).await; rx_response.await.ok() } pub async fn notify_user_input_response( &self, sub_id: &str, response: RequestUserInputResponse, ) { let entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.remove_pending_user_input(sub_id) } None => None, } }; match entry { Some(tx_response) => { tx_response.send(response).ok(); } None => { warn!("No pending user input found for sub_id: {sub_id}"); } } } pub async fn notify_dynamic_tool_response(&self, call_id: &str, response: DynamicToolResponse) { let entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.remove_pending_dynamic_tool(call_id) } None => None, } }; match entry { Some(tx_response) => { tx_response.send(response).ok(); } None => { warn!("No pending dynamic tool call found for call_id: {call_id}"); } } } pub async fn notify_approval(&self, approval_id: &str, decision: ReviewDecision) { let entry = { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.remove_pending_approval(approval_id) } None => None, } }; match entry { Some(tx_approve) => { tx_approve.send(decision).ok(); } None => { warn!("No pending approval found for call_id: {approval_id}"); } } } pub async fn resolve_elicitation( &self, server_name: String, id: RequestId, response: ElicitationResponse, ) -> anyhow::Result<()> { self.services .mcp_connection_manager .read() .await .resolve_elicitation(server_name, id, response) .await } /// Records input items: always append to conversation history and /// persist these response items to rollout. pub(crate) async fn record_conversation_items( &self, turn_context: &TurnContext, items: &[ResponseItem], ) { self.record_into_history(items, turn_context).await; self.persist_rollout_response_items(items).await; self.send_raw_response_items(turn_context, items).await; } async fn reconstruct_history_from_rollout( &self, turn_context: &TurnContext, rollout_items: &[RolloutItem], ) -> Vec { let mut history = ContextManager::new(); for item in rollout_items { match item { RolloutItem::ResponseItem(response_item) => { history.record_items( std::iter::once(response_item), turn_context.truncation_policy, ); } RolloutItem::Compacted(compacted) => { if let Some(replacement) = &compacted.replacement_history { history.replace(replacement.clone()); } else { let user_messages = collect_user_messages(history.raw_items()); let mut rebuilt = self.build_initial_context(turn_context).await; rebuilt.extend(compact::build_compacted_history( &user_messages, &compacted.message, )); history.replace(rebuilt); } } RolloutItem::EventMsg(EventMsg::ThreadRolledBack(rollback)) => { history.drop_last_n_user_turns(rollback.num_turns); } _ => {} } } history.raw_items().to_vec() } /// Append ResponseItems to the in-memory conversation history only. pub(crate) async fn record_into_history( &self, items: &[ResponseItem], turn_context: &TurnContext, ) { let mut state = self.state.lock().await; state.record_items(items.iter(), turn_context.truncation_policy); } pub(crate) async fn record_model_warning(&self, message: impl Into, ctx: &TurnContext) { self.services .otel_manager .counter("codex.model_warning", 1, &[]); let item = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: format!("Warning: {}", message.into()), }], end_turn: None, phase: None, }; self.record_conversation_items(ctx, &[item]).await; } async fn maybe_warn_on_server_model_mismatch( self: &Arc, turn_context: &Arc, server_model: String, ) -> bool { let requested_model = turn_context.model_info.slug.clone(); let server_model_normalized = server_model.to_ascii_lowercase(); let requested_model_normalized = requested_model.to_ascii_lowercase(); if server_model_normalized == requested_model_normalized { info!("server reported model {server_model} (matches requested model)"); return false; } warn!("server reported model {server_model} while requested model was {requested_model}"); let warning_message = format!( "Your account was flagged for potentially high-risk cyber activity and this request was routed to gpt-5.2 as a fallback. To regain access to gpt-5.3-codex, apply for trusted access: {CYBER_VERIFY_URL} or learn more: {CYBER_SAFETY_URL}" ); self.send_event( turn_context, EventMsg::ModelReroute(ModelRerouteEvent { from_model: requested_model.clone(), to_model: server_model.clone(), reason: ModelRerouteReason::HighRiskCyberActivity, }), ) .await; self.send_event( turn_context, EventMsg::Warning(WarningEvent { message: warning_message.clone(), }), ) .await; self.record_model_warning(warning_message, turn_context) .await; true } pub(crate) async fn replace_history(&self, items: Vec) { let mut state = self.state.lock().await; state.replace_history(items); } pub(crate) async fn seed_initial_context_if_needed(&self, turn_context: &TurnContext) { { let mut state = self.state.lock().await; if state.initial_context_seeded { return; } state.initial_context_seeded = true; } let initial_context = self.build_initial_context(turn_context).await; self.record_conversation_items(turn_context, &initial_context) .await; self.flush_rollout().await; } pub(crate) async fn mark_initial_context_unseeded_for_next_turn(&self) { let mut state = self.state.lock().await; state.initial_context_seeded = false; } async fn persist_rollout_response_items(&self, items: &[ResponseItem]) { let rollout_items: Vec = items .iter() .cloned() .map(RolloutItem::ResponseItem) .collect(); self.persist_rollout_items(&rollout_items).await; } pub fn enabled(&self, feature: Feature) -> bool { self.features.enabled(feature) } pub(crate) fn features(&self) -> Features { self.features.clone() } pub(crate) async fn collaboration_mode(&self) -> CollaborationMode { let state = self.state.lock().await; state.session_configuration.collaboration_mode.clone() } async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) { for item in items { self.send_event( turn_context, EventMsg::RawResponseItem(RawResponseItemEvent { item: item.clone() }), ) .await; } } pub(crate) async fn build_initial_context( &self, turn_context: &TurnContext, ) -> Vec { let mut items = Vec::::with_capacity(4); let shell = self.user_shell(); items.push( DeveloperInstructions::from_policy( &turn_context.sandbox_policy, turn_context.approval_policy, self.services.exec_policy.current().as_ref(), &turn_context.cwd, ) .into(), ); if let Some(developer_instructions) = turn_context.developer_instructions.as_deref() { items.push(DeveloperInstructions::new(developer_instructions.to_string()).into()); } // Add developer instructions for memories. if let Some(memory_prompt) = build_memory_tool_developer_instructions(&turn_context.config.codex_home).await && turn_context.features.enabled(Feature::MemoryTool) { items.push(DeveloperInstructions::new(memory_prompt).into()); } // Add developer instructions from collaboration_mode if they exist and are non-empty let (collaboration_mode, base_instructions) = { let state = self.state.lock().await; ( state.session_configuration.collaboration_mode.clone(), state.session_configuration.base_instructions.clone(), ) }; if let Some(collab_instructions) = DeveloperInstructions::from_collaboration_mode(&collaboration_mode) { items.push(collab_instructions.into()); } if self.features.enabled(Feature::Personality) && let Some(personality) = turn_context.personality { let model_info = turn_context.model_info.clone(); let has_baked_personality = model_info.supports_personality() && base_instructions == model_info.get_model_instructions(Some(personality)); if !has_baked_personality && let Some(personality_message) = crate::context_manager::updates::personality_message_for( &model_info, personality, ) { items.push( DeveloperInstructions::personality_spec_message(personality_message).into(), ); } } if turn_context.features.enabled(Feature::Apps) { items.push(DeveloperInstructions::new(render_apps_section()).into()); } if turn_context.features.enabled(Feature::CodexGitCommit) && let Some(commit_message_instruction) = commit_message_trailer_instruction( turn_context.config.commit_attribution.as_deref(), ) { items.push(DeveloperInstructions::new(commit_message_instruction).into()); } if let Some(user_instructions) = turn_context.user_instructions.as_deref() { items.push( UserInstructions { text: user_instructions.to_string(), directory: turn_context.cwd.to_string_lossy().into_owned(), } .into(), ); } items.push(ResponseItem::from(EnvironmentContext::from_turn_context( turn_context, shell.as_ref(), ))); items } pub(crate) async fn persist_rollout_items(&self, items: &[RolloutItem]) { let recorder = { let guard = self.services.rollout.lock().await; guard.clone() }; if let Some(rec) = recorder && let Err(e) = rec.record_items(items).await { error!("failed to record rollout items: {e:#}"); } } pub(crate) async fn clone_history(&self) -> ContextManager { let state = self.state.lock().await; state.clone_history() } pub(crate) async fn update_token_usage_info( &self, turn_context: &TurnContext, token_usage: Option<&TokenUsage>, ) { { let mut state = self.state.lock().await; if let Some(token_usage) = token_usage { state .update_token_info_from_usage(token_usage, turn_context.model_context_window()); } } self.send_token_count_event(turn_context).await; } pub(crate) async fn recompute_token_usage(&self, turn_context: &TurnContext) { let history = self.clone_history().await; let base_instructions = self.get_base_instructions().await; let Some(estimated_total_tokens) = history.estimate_token_count_with_base_instructions(&base_instructions) else { return; }; { let mut state = self.state.lock().await; let mut info = state.token_info().unwrap_or(TokenUsageInfo { total_token_usage: TokenUsage::default(), last_token_usage: TokenUsage::default(), model_context_window: None, }); info.last_token_usage = TokenUsage { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0, reasoning_output_tokens: 0, total_tokens: estimated_total_tokens.max(0), }; if let Some(model_context_window) = turn_context.model_context_window() { info.model_context_window = Some(model_context_window); } state.set_token_info(Some(info)); } self.send_token_count_event(turn_context).await; } pub(crate) async fn update_rate_limits( &self, turn_context: &TurnContext, new_rate_limits: RateLimitSnapshot, ) { { let mut state = self.state.lock().await; state.set_rate_limits(new_rate_limits); } self.send_token_count_event(turn_context).await; } pub(crate) async fn mcp_dependency_prompted(&self) -> HashSet { let state = self.state.lock().await; state.mcp_dependency_prompted() } pub(crate) async fn record_mcp_dependency_prompted(&self, names: I) where I: IntoIterator, { let mut state = self.state.lock().await; state.record_mcp_dependency_prompted(names); } pub async fn dependency_env(&self) -> HashMap { let state = self.state.lock().await; state.dependency_env() } pub async fn set_dependency_env(&self, values: HashMap) { let mut state = self.state.lock().await; state.set_dependency_env(values); } pub(crate) async fn set_server_reasoning_included(&self, included: bool) { let mut state = self.state.lock().await; state.set_server_reasoning_included(included); } async fn send_token_count_event(&self, turn_context: &TurnContext) { let (info, rate_limits) = { let state = self.state.lock().await; state.token_info_and_rate_limits() }; let event = EventMsg::TokenCount(TokenCountEvent { info, rate_limits }); self.send_event(turn_context, event).await; } pub(crate) async fn set_total_tokens_full(&self, turn_context: &TurnContext) { if let Some(context_window) = turn_context.model_context_window() { let mut state = self.state.lock().await; state.set_token_usage_full(context_window); } self.send_token_count_event(turn_context).await; } pub(crate) async fn record_response_item_and_emit_turn_item( &self, turn_context: &TurnContext, response_item: ResponseItem, ) { // Add to conversation history and persist response item to rollout. self.record_conversation_items(turn_context, std::slice::from_ref(&response_item)) .await; // Derive a turn item and emit lifecycle events if applicable. if let Some(item) = parse_turn_item(&response_item) { self.emit_turn_item_started(turn_context, &item).await; self.emit_turn_item_completed(turn_context, item).await; } } pub(crate) async fn record_user_prompt_and_emit_turn_item( &self, turn_context: &TurnContext, input: &[UserInput], response_item: ResponseItem, ) { // Persist the user message to history, but emit the turn item from `UserInput` so // UI-only `text_elements` are preserved. `ResponseItem::Message` does not carry // those spans, and `record_response_item_and_emit_turn_item` would drop them. self.record_conversation_items(turn_context, std::slice::from_ref(&response_item)) .await; let turn_item = TurnItem::UserMessage(UserMessageItem::new(input)); self.emit_turn_item_started(turn_context, &turn_item).await; self.emit_turn_item_completed(turn_context, turn_item).await; self.ensure_rollout_materialized().await; } pub(crate) async fn notify_background_event( &self, turn_context: &TurnContext, message: impl Into, ) { let event = EventMsg::BackgroundEvent(BackgroundEventEvent { message: message.into(), }); self.send_event(turn_context, event).await; } pub(crate) async fn notify_stream_error( &self, turn_context: &TurnContext, message: impl Into, codex_error: CodexErr, ) { let additional_details = codex_error.to_string(); let codex_error_info = CodexErrorInfo::ResponseStreamDisconnected { http_status_code: codex_error.http_status_code_value(), }; let event = EventMsg::StreamError(StreamErrorEvent { message: message.into(), codex_error_info: Some(codex_error_info), additional_details: Some(additional_details), }); self.send_event(turn_context, event).await; } async fn maybe_start_ghost_snapshot( self: &Arc, turn_context: Arc, cancellation_token: CancellationToken, ) { if !self.enabled(Feature::GhostCommit) { return; } let token = match turn_context.tool_call_gate.subscribe().await { Ok(token) => token, Err(err) => { warn!("failed to subscribe to ghost snapshot readiness: {err}"); return; } }; info!("spawning ghost snapshot task"); let task = GhostSnapshotTask::new(token); Arc::new(task) .run( Arc::new(SessionTaskContext::new(self.clone())), turn_context.clone(), Vec::new(), cancellation_token, ) .await; } /// Inject additional user input into the currently active turn. /// /// Returns the active turn id when accepted. pub async fn steer_input( &self, input: Vec, expected_turn_id: Option<&str>, ) -> Result { if input.is_empty() { return Err(SteerInputError::EmptyInput); } let mut active = self.active_turn.lock().await; let Some(active_turn) = active.as_mut() else { return Err(SteerInputError::NoActiveTurn(input)); }; let Some((active_turn_id, _)) = active_turn.tasks.first() else { return Err(SteerInputError::NoActiveTurn(input)); }; if let Some(expected_turn_id) = expected_turn_id && expected_turn_id != active_turn_id { return Err(SteerInputError::ExpectedTurnMismatch { expected: expected_turn_id.to_string(), actual: active_turn_id.clone(), }); } let mut turn_state = active_turn.turn_state.lock().await; turn_state.push_pending_input(input.into()); Ok(active_turn_id.clone()) } /// Returns the input if there was no task running to inject into pub async fn inject_response_items( &self, input: Vec, ) -> Result<(), Vec> { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; for item in input { ts.push_pending_input(item); } Ok(()) } None => Err(input), } } pub async fn get_pending_input(&self) -> Vec { let mut active = self.active_turn.lock().await; match active.as_mut() { Some(at) => { let mut ts = at.turn_state.lock().await; ts.take_pending_input() } None => Vec::with_capacity(0), } } pub async fn has_pending_input(&self) -> bool { let active = self.active_turn.lock().await; match active.as_ref() { Some(at) => { let ts = at.turn_state.lock().await; ts.has_pending_input() } None => false, } } pub async fn list_resources( &self, server: &str, params: Option, ) -> anyhow::Result { self.services .mcp_connection_manager .read() .await .list_resources(server, params) .await } pub async fn list_resource_templates( &self, server: &str, params: Option, ) -> anyhow::Result { self.services .mcp_connection_manager .read() .await .list_resource_templates(server, params) .await } pub async fn read_resource( &self, server: &str, params: ReadResourceRequestParams, ) -> anyhow::Result { self.services .mcp_connection_manager .read() .await .read_resource(server, params) .await } pub async fn call_tool( &self, server: &str, tool: &str, arguments: Option, ) -> anyhow::Result { self.services .mcp_connection_manager .read() .await .call_tool(server, tool, arguments) .await } pub(crate) async fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> { self.services .mcp_connection_manager .read() .await .parse_tool_name(tool_name) .await } pub async fn interrupt_task(self: &Arc) { info!("interrupt received: abort current task, if any"); let has_active_turn = { self.active_turn.lock().await.is_some() }; if has_active_turn { self.abort_all_tasks(TurnAbortReason::Interrupted).await; } else { self.cancel_mcp_startup().await; } } pub(crate) fn hooks(&self) -> &Hooks { &self.services.hooks } pub(crate) fn user_shell(&self) -> Arc { Arc::clone(&self.services.user_shell) } async fn refresh_mcp_servers_inner( &self, turn_context: &TurnContext, mcp_servers: HashMap, store_mode: OAuthCredentialsStoreMode, ) { let auth = self.services.auth_manager.auth().await; let config = self.get_config().await; let mcp_servers = with_codex_apps_mcp( mcp_servers, self.features.enabled(Feature::Apps), auth.as_ref(), config.as_ref(), ); let auth_statuses = compute_auth_statuses(mcp_servers.iter(), store_mode).await; let sandbox_state = SandboxState { sandbox_policy: turn_context.sandbox_policy.clone(), codex_linux_sandbox_exe: turn_context.codex_linux_sandbox_exe.clone(), sandbox_cwd: turn_context.cwd.clone(), use_linux_sandbox_bwrap: turn_context.features.enabled(Feature::UseLinuxSandboxBwrap), }; let cancel_token = self.reset_mcp_startup_cancellation_token().await; let mut refreshed_manager = McpConnectionManager::default(); refreshed_manager .initialize( &mcp_servers, store_mode, auth_statuses, self.get_tx_event(), cancel_token, sandbox_state, ) .await; let mut manager = self.services.mcp_connection_manager.write().await; *manager = refreshed_manager; } async fn refresh_mcp_servers_if_requested(&self, turn_context: &TurnContext) { let refresh_config = { self.pending_mcp_server_refresh_config.lock().await.take() }; let Some(refresh_config) = refresh_config else { return; }; let McpServerRefreshConfig { mcp_servers, mcp_oauth_credentials_store_mode, } = refresh_config; let mcp_servers = match serde_json::from_value::>(mcp_servers) { Ok(servers) => servers, Err(err) => { warn!("failed to parse MCP server refresh config: {err}"); return; } }; let store_mode = match serde_json::from_value::( mcp_oauth_credentials_store_mode, ) { Ok(mode) => mode, Err(err) => { warn!("failed to parse MCP OAuth refresh config: {err}"); return; } }; self.refresh_mcp_servers_inner(turn_context, mcp_servers, store_mode) .await; } pub(crate) async fn refresh_mcp_servers_now( &self, turn_context: &TurnContext, mcp_servers: HashMap, store_mode: OAuthCredentialsStoreMode, ) { self.refresh_mcp_servers_inner(turn_context, mcp_servers, store_mode) .await; } async fn mcp_startup_cancellation_token(&self) -> CancellationToken { self.services .mcp_startup_cancellation_token .lock() .await .clone() } async fn reset_mcp_startup_cancellation_token(&self) -> CancellationToken { let mut guard = self.services.mcp_startup_cancellation_token.lock().await; guard.cancel(); let cancel_token = CancellationToken::new(); *guard = cancel_token.clone(); cancel_token } fn show_raw_agent_reasoning(&self) -> bool { self.services.show_raw_agent_reasoning } async fn cancel_mcp_startup(&self) { self.services .mcp_startup_cancellation_token .lock() .await .cancel(); } } async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiver) { // Seed with context in case there is an OverrideTurnContext first. let mut previous_context: Option> = Some(sess.new_default_turn().await); // To break out of this loop, send Op::Shutdown. while let Ok(sub) = rx_sub.recv().await { debug!(?sub, "Submission"); match sub.op.clone() { Op::Interrupt => { handlers::interrupt(&sess).await; } Op::CleanBackgroundTerminals => { handlers::clean_background_terminals(&sess).await; } Op::OverrideTurnContext { cwd, approval_policy, sandbox_policy, windows_sandbox_level, model, effort, summary, collaboration_mode, personality, } => { let collaboration_mode = if let Some(collab_mode) = collaboration_mode { collab_mode } else { let state = sess.state.lock().await; state.session_configuration.collaboration_mode.with_updates( model.clone(), effort, None, ) }; handlers::override_turn_context( &sess, sub.id.clone(), SessionSettingsUpdate { cwd, approval_policy, sandbox_policy, windows_sandbox_level, collaboration_mode: Some(collaboration_mode), reasoning_summary: summary, personality, ..Default::default() }, ) .await; } Op::UserInput { .. } | Op::UserTurn { .. } => { handlers::user_input_or_turn(&sess, sub.id.clone(), sub.op, &mut previous_context) .await; } Op::ExecApproval { id: approval_id, turn_id, decision, } => { handlers::exec_approval(&sess, approval_id, turn_id, decision).await; } Op::PatchApproval { id, decision } => { handlers::patch_approval(&sess, id, decision).await; } Op::UserInputAnswer { id, response } => { handlers::request_user_input_response(&sess, id, response).await; } Op::DynamicToolResponse { id, response } => { handlers::dynamic_tool_response(&sess, id, response).await; } Op::AddToHistory { text } => { handlers::add_to_history(&sess, &config, text).await; } Op::GetHistoryEntryRequest { offset, log_id } => { handlers::get_history_entry_request(&sess, &config, sub.id.clone(), offset, log_id) .await; } Op::ListMcpTools => { handlers::list_mcp_tools(&sess, &config, sub.id.clone()).await; } Op::RefreshMcpServers { config } => { handlers::refresh_mcp_servers(&sess, config).await; } Op::ReloadUserConfig => { handlers::reload_user_config(&sess).await; } Op::ListCustomPrompts => { handlers::list_custom_prompts(&sess, sub.id.clone()).await; } Op::ListSkills { cwds, force_reload } => { handlers::list_skills(&sess, sub.id.clone(), cwds, force_reload).await; } Op::ListRemoteSkills { hazelnut_scope, product_surface, enabled, } => { handlers::list_remote_skills( &sess, &config, sub.id.clone(), hazelnut_scope, product_surface, enabled, ) .await; } Op::DownloadRemoteSkill { hazelnut_id } => { handlers::export_remote_skill(&sess, &config, sub.id.clone(), hazelnut_id).await; } Op::Undo => { handlers::undo(&sess, sub.id.clone()).await; } Op::Compact => { handlers::compact(&sess, sub.id.clone()).await; } Op::DropMemories => { handlers::drop_memories(&sess, &config, sub.id.clone()).await; } Op::UpdateMemories => { handlers::update_memories(&sess, &config, sub.id.clone()).await; } Op::ThreadRollback { num_turns } => { handlers::thread_rollback(&sess, sub.id.clone(), num_turns).await; } Op::SetThreadName { name } => { handlers::set_thread_name(&sess, sub.id.clone(), name).await; } Op::RunUserShellCommand { command } => { handlers::run_user_shell_command( &sess, sub.id.clone(), command, &mut previous_context, ) .await; } Op::ResolveElicitation { server_name, request_id, decision, } => { handlers::resolve_elicitation(&sess, server_name, request_id, decision).await; } Op::Shutdown => { if handlers::shutdown(&sess, sub.id.clone()).await { break; } } Op::Review { review_request } => { handlers::review(&sess, &config, sub.id.clone(), review_request).await; } _ => {} // Ignore unknown ops; enum is non_exhaustive to allow extensions. } } debug!("Agent loop exited"); } /// Operation handlers mod handlers { use crate::codex::Session; use crate::codex::SessionSettingsUpdate; use crate::codex::SteerInputError; use crate::codex::TurnContext; use crate::codex::spawn_review_thread; use crate::config::Config; use crate::mcp::auth::compute_auth_statuses; use crate::mcp::collect_mcp_snapshot_from_manager; use crate::mcp::effective_mcp_servers; use crate::review_prompts::resolve_review_request; use crate::rollout::session_index; use crate::tasks::CompactTask; use crate::tasks::UndoTask; use crate::tasks::UserShellCommandMode; use crate::tasks::UserShellCommandTask; use crate::tasks::execute_user_shell_command; use codex_protocol::custom_prompts::CustomPrompt; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::ListCustomPromptsResponseEvent; use codex_protocol::protocol::ListRemoteSkillsResponseEvent; use codex_protocol::protocol::ListSkillsResponseEvent; use codex_protocol::protocol::McpServerRefreshConfig; use codex_protocol::protocol::Op; use codex_protocol::protocol::RemoteSkillDownloadedEvent; use codex_protocol::protocol::RemoteSkillHazelnutScope; use codex_protocol::protocol::RemoteSkillProductSurface; use codex_protocol::protocol::RemoteSkillSummary; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::SkillsListEntry; use codex_protocol::protocol::ThreadNameUpdatedEvent; use codex_protocol::protocol::ThreadRolledBackEvent; use codex_protocol::protocol::TurnAbortReason; use codex_protocol::protocol::WarningEvent; use codex_protocol::request_user_input::RequestUserInputResponse; use crate::context_manager::is_user_turn_boundary; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::dynamic_tools::DynamicToolResponse; use codex_protocol::mcp::RequestId as ProtocolRequestId; use codex_protocol::user_input::UserInput; use codex_rmcp_client::ElicitationAction; use codex_rmcp_client::ElicitationResponse; use std::path::PathBuf; use std::sync::Arc; use tracing::info; use tracing::warn; pub async fn interrupt(sess: &Arc) { sess.interrupt_task().await; } pub async fn clean_background_terminals(sess: &Arc) { sess.close_unified_exec_processes().await; } pub async fn override_turn_context( sess: &Session, sub_id: String, updates: SessionSettingsUpdate, ) { if let Err(err) = sess.update_settings(updates).await { sess.send_event_raw(Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: err.to_string(), codex_error_info: Some(CodexErrorInfo::BadRequest), }), }) .await; } } pub async fn user_input_or_turn( sess: &Arc, sub_id: String, op: Op, previous_context: &mut Option>, ) { let (items, updates) = match op { Op::UserTurn { cwd, approval_policy, sandbox_policy, model, effort, summary, final_output_json_schema, items, collaboration_mode, personality, } => { let collaboration_mode = collaboration_mode.or_else(|| { Some(CollaborationMode { mode: ModeKind::Default, settings: Settings { model: model.clone(), reasoning_effort: effort, developer_instructions: None, }, }) }); ( items, SessionSettingsUpdate { cwd: Some(cwd), approval_policy: Some(approval_policy), sandbox_policy: Some(sandbox_policy), windows_sandbox_level: None, collaboration_mode, reasoning_summary: Some(summary), final_output_json_schema: Some(final_output_json_schema), personality, }, ) } Op::UserInput { items, final_output_json_schema, } => ( items, SessionSettingsUpdate { final_output_json_schema: Some(final_output_json_schema), ..Default::default() }, ), _ => unreachable!(), }; let Ok(current_context) = sess.new_turn_with_sub_id(sub_id, updates).await else { // new_turn_with_sub_id already emits the error event. return; }; sess.maybe_emit_unknown_model_warning_for_turn(current_context.as_ref()) .await; current_context.otel_manager.user_prompt(&items); // Attempt to inject input into current task. if let Err(SteerInputError::NoActiveTurn(items)) = sess.steer_input(items, None).await { sess.seed_initial_context_if_needed(¤t_context).await; let previous_model = sess.previous_model().await; let pre_turn_context_items = sess.build_settings_update_items( previous_context.as_ref(), previous_model.as_deref(), ¤t_context, ); let has_user_input = !items.is_empty(); if !has_user_input && !pre_turn_context_items.is_empty() { // Empty-input UserTurn still needs these model-visible updates persisted now. // Otherwise `previous_context` advances and the next non-empty turn computes no diff. sess.record_conversation_items(¤t_context, &pre_turn_context_items) .await; } sess.refresh_mcp_servers_if_requested(¤t_context) .await; let regular_task = if has_user_input { sess.take_startup_regular_task() .await .unwrap_or_default() .with_pre_turn_context_items(pre_turn_context_items) } else { sess.take_startup_regular_task().await.unwrap_or_default() }; sess.spawn_task(Arc::clone(¤t_context), items, regular_task) .await; *previous_context = Some(current_context); } } pub async fn run_user_shell_command( sess: &Arc, sub_id: String, command: String, previous_context: &mut Option>, ) { if let Some((turn_context, cancellation_token)) = sess.active_turn_context_and_cancellation_token().await { let session = Arc::clone(sess); tokio::spawn(async move { execute_user_shell_command( session, turn_context, command, cancellation_token, UserShellCommandMode::ActiveTurnAuxiliary, ) .await; }); return; } let turn_context = sess.new_default_turn_with_sub_id(sub_id).await; sess.spawn_task( Arc::clone(&turn_context), Vec::new(), UserShellCommandTask::new(command), ) .await; *previous_context = Some(turn_context); } pub async fn resolve_elicitation( sess: &Arc, server_name: String, request_id: ProtocolRequestId, decision: codex_protocol::approvals::ElicitationAction, ) { let action = match decision { codex_protocol::approvals::ElicitationAction::Accept => ElicitationAction::Accept, codex_protocol::approvals::ElicitationAction::Decline => ElicitationAction::Decline, codex_protocol::approvals::ElicitationAction::Cancel => ElicitationAction::Cancel, }; // When accepting, send an empty object as content to satisfy MCP servers // that expect non-null content on Accept. For Decline/Cancel, content is None. let content = match action { ElicitationAction::Accept => Some(serde_json::json!({})), ElicitationAction::Decline | ElicitationAction::Cancel => None, }; let response = ElicitationResponse { action, content }; let request_id = match request_id { ProtocolRequestId::String(value) => { rmcp::model::NumberOrString::String(std::sync::Arc::from(value)) } ProtocolRequestId::Integer(value) => rmcp::model::NumberOrString::Number(value), }; if let Err(err) = sess .resolve_elicitation(server_name, request_id, response) .await { warn!( error = %err, "failed to resolve elicitation request in session" ); } } /// Propagate a user's exec approval decision to the session. /// Also optionally applies an execpolicy amendment. pub async fn exec_approval( sess: &Arc, approval_id: String, turn_id: Option, decision: ReviewDecision, ) { let event_turn_id = turn_id.unwrap_or_else(|| approval_id.clone()); if let ReviewDecision::ApprovedExecpolicyAmendment { proposed_execpolicy_amendment, } = &decision { match sess .persist_execpolicy_amendment(proposed_execpolicy_amendment) .await { Ok(()) => { sess.record_execpolicy_amendment_message( &event_turn_id, proposed_execpolicy_amendment, ) .await; } Err(err) => { let message = format!("Failed to apply execpolicy amendment: {err}"); tracing::warn!("{message}"); let warning = EventMsg::Warning(WarningEvent { message }); sess.send_event_raw(Event { id: event_turn_id.clone(), msg: warning, }) .await; } } } match decision { ReviewDecision::Abort => { sess.interrupt_task().await; } other => sess.notify_approval(&approval_id, other).await, } } pub async fn patch_approval(sess: &Arc, id: String, decision: ReviewDecision) { match decision { ReviewDecision::Abort => { sess.interrupt_task().await; } other => sess.notify_approval(&id, other).await, } } pub async fn request_user_input_response( sess: &Arc, id: String, response: RequestUserInputResponse, ) { sess.notify_user_input_response(&id, response).await; } pub async fn dynamic_tool_response( sess: &Arc, id: String, response: DynamicToolResponse, ) { sess.notify_dynamic_tool_response(&id, response).await; } pub async fn add_to_history(sess: &Arc, config: &Arc, text: String) { let id = sess.conversation_id; let config = Arc::clone(config); tokio::spawn(async move { if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await { warn!("failed to append to message history: {e}"); } }); } pub async fn get_history_entry_request( sess: &Arc, config: &Arc, sub_id: String, offset: usize, log_id: u64, ) { let config = Arc::clone(config); let sess_clone = Arc::clone(sess); tokio::spawn(async move { // Run lookup in blocking thread because it does file IO + locking. let entry_opt = tokio::task::spawn_blocking(move || { crate::message_history::lookup(log_id, offset, &config) }) .await .unwrap_or(None); let event = Event { id: sub_id, msg: EventMsg::GetHistoryEntryResponse( crate::protocol::GetHistoryEntryResponseEvent { offset, log_id, entry: entry_opt.map(|e| codex_protocol::message_history::HistoryEntry { conversation_id: e.session_id, ts: e.ts, text: e.text, }), }, ), }; sess_clone.send_event_raw(event).await; }); } pub async fn refresh_mcp_servers(sess: &Arc, refresh_config: McpServerRefreshConfig) { let mut guard = sess.pending_mcp_server_refresh_config.lock().await; *guard = Some(refresh_config); } pub async fn reload_user_config(sess: &Arc) { sess.reload_user_config_layer().await; } pub async fn list_mcp_tools(sess: &Session, config: &Arc, sub_id: String) { let mcp_connection_manager = sess.services.mcp_connection_manager.read().await; let auth = sess.services.auth_manager.auth().await; let mcp_servers = effective_mcp_servers(config, auth.as_ref()); let snapshot = collect_mcp_snapshot_from_manager( &mcp_connection_manager, compute_auth_statuses(mcp_servers.iter(), config.mcp_oauth_credentials_store_mode) .await, ) .await; let event = Event { id: sub_id, msg: EventMsg::McpListToolsResponse(snapshot), }; sess.send_event_raw(event).await; } pub async fn list_custom_prompts(sess: &Session, sub_id: String) { let custom_prompts: Vec = if let Some(dir) = crate::custom_prompts::default_prompts_dir() { crate::custom_prompts::discover_prompts_in(&dir).await } else { Vec::new() }; let event = Event { id: sub_id, msg: EventMsg::ListCustomPromptsResponse(ListCustomPromptsResponseEvent { custom_prompts, }), }; sess.send_event_raw(event).await; } pub async fn list_skills( sess: &Session, sub_id: String, cwds: Vec, force_reload: bool, ) { let cwds = if cwds.is_empty() { let state = sess.state.lock().await; vec![state.session_configuration.cwd.clone()] } else { cwds }; let skills_manager = &sess.services.skills_manager; let mut skills = Vec::new(); for cwd in cwds { let outcome = skills_manager.skills_for_cwd(&cwd, force_reload).await; let errors = super::errors_to_info(&outcome.errors); let skills_metadata = super::skills_to_info(&outcome.skills, &outcome.disabled_paths); skills.push(SkillsListEntry { cwd, skills: skills_metadata, errors, }); } let event = Event { id: sub_id, msg: EventMsg::ListSkillsResponse(ListSkillsResponseEvent { skills }), }; sess.send_event_raw(event).await; } pub async fn list_remote_skills( sess: &Session, config: &Arc, sub_id: String, hazelnut_scope: RemoteSkillHazelnutScope, product_surface: RemoteSkillProductSurface, enabled: Option, ) { let auth = sess.services.auth_manager.auth().await; let response = crate::skills::remote::list_remote_skills( config, auth.as_ref(), hazelnut_scope, product_surface, enabled, ) .await .map(|skills| { skills .into_iter() .map(|skill| RemoteSkillSummary { id: skill.id, name: skill.name, description: skill.description, }) .collect::>() }); match response { Ok(skills) => { let event = Event { id: sub_id, msg: EventMsg::ListRemoteSkillsResponse(ListRemoteSkillsResponseEvent { skills, }), }; sess.send_event_raw(event).await; } Err(err) => { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: format!("failed to list remote skills: {err}"), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event_raw(event).await; } } } pub async fn export_remote_skill( sess: &Session, config: &Arc, sub_id: String, hazelnut_id: String, ) { let auth = sess.services.auth_manager.auth().await; match crate::skills::remote::export_remote_skill( config, auth.as_ref(), hazelnut_id.as_str(), ) .await { Ok(result) => { let id = result.id; let event = Event { id: sub_id, msg: EventMsg::RemoteSkillDownloaded(RemoteSkillDownloadedEvent { id: id.clone(), name: id, path: result.path, }), }; sess.send_event_raw(event).await; } Err(err) => { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: format!("failed to export remote skill {hazelnut_id}: {err}"), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event_raw(event).await; } } } pub async fn undo(sess: &Arc, sub_id: String) { let turn_context = sess.new_default_turn_with_sub_id(sub_id).await; sess.spawn_task(turn_context, Vec::new(), UndoTask::new()) .await; } pub async fn compact(sess: &Arc, sub_id: String) { let turn_context = sess.new_default_turn_with_sub_id(sub_id).await; sess.spawn_task( Arc::clone(&turn_context), vec![UserInput::Text { text: turn_context.compact_prompt().to_string(), // Compaction prompt is synthesized; no UI element ranges to preserve. text_elements: Vec::new(), }], CompactTask, ) .await; } pub async fn drop_memories(sess: &Arc, config: &Arc, sub_id: String) { let mut errors = Vec::new(); if let Some(state_db) = sess.services.state_db.as_deref() { if let Err(err) = state_db.clear_memory_data().await { errors.push(format!("failed clearing memory rows from state db: {err}")); } } else { errors.push("state db unavailable; memory rows were not cleared".to_string()); } let memory_root = crate::memories::memory_root(&config.codex_home); if let Err(err) = tokio::fs::remove_dir_all(&memory_root).await && err.kind() != std::io::ErrorKind::NotFound { errors.push(format!( "failed removing memory directory {}: {err}", memory_root.display() )); } if errors.is_empty() { sess.send_event_raw(Event { id: sub_id, msg: EventMsg::Warning(WarningEvent { message: format!( "Dropped memories at {} and cleared memory rows from state db.", memory_root.display() ), }), }) .await; return; } sess.send_event_raw(Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: format!("Memory drop completed with errors: {}", errors.join("; ")), codex_error_info: Some(CodexErrorInfo::Other), }), }) .await; } pub async fn update_memories(sess: &Arc, config: &Arc, sub_id: String) { let session_source = { let state = sess.state.lock().await; state.session_configuration.session_source.clone() }; crate::memories::start_memories_startup_task(sess, Arc::clone(config), &session_source); sess.send_event_raw(Event { id: sub_id.clone(), msg: EventMsg::Warning(WarningEvent { message: "Memory update triggered.".to_string(), }), }) .await; } pub async fn thread_rollback(sess: &Arc, sub_id: String, num_turns: u32) { if num_turns == 0 { sess.send_event_raw(Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: "num_turns must be >= 1".to_string(), codex_error_info: Some(CodexErrorInfo::ThreadRollbackFailed), }), }) .await; return; } let has_active_turn = { sess.active_turn.lock().await.is_some() }; if has_active_turn { sess.send_event_raw(Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: "Cannot rollback while a turn is in progress.".to_string(), codex_error_info: Some(CodexErrorInfo::ThreadRollbackFailed), }), }) .await; return; } let turn_context = sess.new_default_turn_with_sub_id(sub_id).await; sess.set_previous_model(Some(turn_context.model_info.slug.clone())) .await; let mut history = sess.clone_history().await; history.drop_last_n_user_turns(num_turns); // Replace with the raw items. We don't want to replace with a normalized // version of the history. sess.replace_history(history.raw_items().to_vec()).await; sess.recompute_token_usage(turn_context.as_ref()).await; sess.send_event_raw_flushed(Event { id: turn_context.sub_id.clone(), msg: EventMsg::ThreadRolledBack(ThreadRolledBackEvent { num_turns }), }) .await; } /// Persists the thread name in the session index, updates in-memory state, and emits /// a `ThreadNameUpdated` event on success. /// /// This appends the name to `CODEX_HOME/sessions_index.jsonl` via `session_index::append_thread_name` for the /// current `thread_id`, then updates `SessionConfiguration::thread_name`. /// /// Returns an error event if the name is empty or session persistence is disabled. pub async fn set_thread_name(sess: &Arc, sub_id: String, name: String) { let Some(name) = crate::util::normalize_thread_name(&name) else { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: "Thread name cannot be empty.".to_string(), codex_error_info: Some(CodexErrorInfo::BadRequest), }), }; sess.send_event_raw(event).await; return; }; let persistence_enabled = { let rollout = sess.services.rollout.lock().await; rollout.is_some() }; if !persistence_enabled { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: "Session persistence is disabled; cannot rename thread.".to_string(), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event_raw(event).await; return; }; let codex_home = sess.codex_home().await; if let Err(e) = session_index::append_thread_name(&codex_home, sess.conversation_id, &name).await { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: format!("Failed to set thread name: {e}"), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event_raw(event).await; return; } { let mut state = sess.state.lock().await; state.session_configuration.thread_name = Some(name.clone()); } sess.send_event_raw(Event { id: sub_id, msg: EventMsg::ThreadNameUpdated(ThreadNameUpdatedEvent { thread_id: sess.conversation_id, thread_name: Some(name), }), }) .await; } pub async fn shutdown(sess: &Arc, sub_id: String) -> bool { sess.abort_all_tasks(TurnAbortReason::Interrupted).await; sess.services .unified_exec_manager .terminate_all_processes() .await; info!("Shutting down Codex instance"); let history = sess.clone_history().await; let turn_count = history .raw_items() .iter() .filter(|item| is_user_turn_boundary(item)) .count(); sess.services.otel_manager.counter( "codex.conversation.turn.count", i64::try_from(turn_count).unwrap_or(0), &[], ); // Gracefully flush and shutdown rollout recorder on session end so tests // that inspect the rollout file do not race with the background writer. let recorder_opt = { let mut guard = sess.services.rollout.lock().await; guard.take() }; if let Some(rec) = recorder_opt && let Err(e) = rec.shutdown().await { warn!("failed to shutdown rollout recorder: {e}"); let event = Event { id: sub_id.clone(), msg: EventMsg::Error(ErrorEvent { message: "Failed to shutdown rollout recorder".to_string(), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event_raw(event).await; } let event = Event { id: sub_id, msg: EventMsg::ShutdownComplete, }; sess.send_event_raw(event).await; true } pub async fn review( sess: &Arc, config: &Arc, sub_id: String, review_request: ReviewRequest, ) { let turn_context = sess.new_default_turn_with_sub_id(sub_id.clone()).await; sess.maybe_emit_unknown_model_warning_for_turn(turn_context.as_ref()) .await; sess.refresh_mcp_servers_if_requested(&turn_context).await; match resolve_review_request(review_request, turn_context.cwd.as_path()) { Ok(resolved) => { spawn_review_thread( Arc::clone(sess), Arc::clone(config), turn_context.clone(), sub_id, resolved, ) .await; } Err(err) => { let event = Event { id: sub_id, msg: EventMsg::Error(ErrorEvent { message: err.to_string(), codex_error_info: Some(CodexErrorInfo::Other), }), }; sess.send_event(&turn_context, event.msg).await; } } } } /// Spawn a review thread using the given prompt. async fn spawn_review_thread( sess: Arc, config: Arc, parent_turn_context: Arc, sub_id: String, resolved: crate::review_prompts::ResolvedReviewRequest, ) { let model = config .review_model .clone() .unwrap_or_else(|| parent_turn_context.model_info.slug.clone()); let review_model_info = sess .services .models_manager .get_model_info(&model, &config) .await; // For reviews, disable web_search and view_image regardless of global settings. let mut review_features = sess.features.clone(); review_features .disable(crate::features::Feature::WebSearchRequest) .disable(crate::features::Feature::WebSearchCached); let review_web_search_mode = WebSearchMode::Disabled; let tools_config = ToolsConfig::new(&ToolsConfigParams { model_info: &review_model_info, features: &review_features, web_search_mode: Some(review_web_search_mode), }) .with_agent_roles(config.agent_roles.clone()); let review_prompt = resolved.prompt.clone(); let provider = parent_turn_context.provider.clone(); let auth_manager = parent_turn_context.auth_manager.clone(); let model_info = review_model_info.clone(); // Build per‑turn client with the requested model/family. let mut per_turn_config = (*config).clone(); per_turn_config.model = Some(model.clone()); per_turn_config.features = review_features.clone(); if let Err(err) = per_turn_config.web_search_mode.set(review_web_search_mode) { let fallback_value = per_turn_config.web_search_mode.value(); tracing::warn!( error = %err, ?review_web_search_mode, ?fallback_value, "review web_search_mode is disallowed by requirements; keeping constrained value" ); } let otel_manager = parent_turn_context .otel_manager .clone() .with_model(model.as_str(), review_model_info.slug.as_str()); let auth_manager_for_context = auth_manager.clone(); let provider_for_context = provider.clone(); let otel_manager_for_context = otel_manager.clone(); let reasoning_effort = per_turn_config.model_reasoning_effort; let reasoning_summary = per_turn_config.model_reasoning_summary; let session_source = parent_turn_context.session_source.clone(); let per_turn_config = Arc::new(per_turn_config); let review_turn_id = sub_id.to_string(); let turn_metadata_state = Arc::new(TurnMetadataState::new( review_turn_id.clone(), parent_turn_context.cwd.clone(), &parent_turn_context.sandbox_policy, parent_turn_context.windows_sandbox_level, parent_turn_context .features .enabled(Feature::UseLinuxSandboxBwrap), )); let review_turn_context = TurnContext { sub_id: review_turn_id, config: per_turn_config, auth_manager: auth_manager_for_context, model_info: model_info.clone(), otel_manager: otel_manager_for_context, provider: provider_for_context, reasoning_effort, reasoning_summary, session_source, tools_config, features: parent_turn_context.features.clone(), ghost_snapshot: parent_turn_context.ghost_snapshot.clone(), developer_instructions: None, user_instructions: None, compact_prompt: parent_turn_context.compact_prompt.clone(), collaboration_mode: parent_turn_context.collaboration_mode.clone(), personality: parent_turn_context.personality, approval_policy: parent_turn_context.approval_policy, sandbox_policy: parent_turn_context.sandbox_policy.clone(), network: parent_turn_context.network.clone(), windows_sandbox_level: parent_turn_context.windows_sandbox_level, shell_environment_policy: parent_turn_context.shell_environment_policy.clone(), cwd: parent_turn_context.cwd.clone(), final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), js_repl: Arc::clone(&sess.js_repl), dynamic_tools: parent_turn_context.dynamic_tools.clone(), truncation_policy: model_info.truncation_policy.into(), turn_metadata_state, }; // Seed the child task with the review prompt as the initial user message. let input: Vec = vec![UserInput::Text { text: review_prompt, // Review prompt is synthesized; no UI element ranges to preserve. text_elements: Vec::new(), }]; let tc = Arc::new(review_turn_context); tc.turn_metadata_state.spawn_git_enrichment_task(); sess.spawn_task(tc.clone(), input, ReviewTask::new()).await; // Announce entering review mode so UIs can switch modes. let review_request = ReviewRequest { target: resolved.target, user_facing_hint: Some(resolved.user_facing_hint), }; sess.send_event(&tc, EventMsg::EnteredReviewMode(review_request)) .await; } fn skills_to_info( skills: &[SkillMetadata], disabled_paths: &HashSet, ) -> Vec { skills .iter() .map(|skill| ProtocolSkillMetadata { name: skill.name.clone(), description: skill.description.clone(), short_description: skill.short_description.clone(), interface: skill .interface .clone() .map(|interface| ProtocolSkillInterface { display_name: interface.display_name, short_description: interface.short_description, icon_small: interface.icon_small, icon_large: interface.icon_large, brand_color: interface.brand_color, default_prompt: interface.default_prompt, }), dependencies: skill.dependencies.clone().map(|dependencies| { ProtocolSkillDependencies { tools: dependencies .tools .into_iter() .map(|tool| ProtocolSkillToolDependency { r#type: tool.r#type, value: tool.value, description: tool.description, transport: tool.transport, command: tool.command, url: tool.url, }) .collect(), } }), path: skill.path.clone(), scope: skill.scope, enabled: !disabled_paths.contains(&skill.path), }) .collect() } fn errors_to_info(errors: &[SkillError]) -> Vec { errors .iter() .map(|err| SkillErrorInfo { path: err.path.clone(), message: err.message.clone(), }) .collect() } /// Takes a user message as input and runs a loop where, at each sampling request, the model /// replies with either: /// /// - requested function calls /// - an assistant message /// /// While it is possible for the model to return multiple of these items in a /// single sampling request, in practice, we generally one item per sampling request: /// /// - If the model requests a function call, we execute it and send the output /// back to the model in the next sampling request. /// - If the model sends only an assistant message, we record it in the /// conversation history and consider the turn complete. /// pub(crate) async fn run_turn( sess: Arc, turn_context: Arc, input: Vec, pre_turn_context_items: Vec, prewarmed_client_session: Option, cancellation_token: CancellationToken, ) -> Option { if input.is_empty() { return None; } let model_info = turn_context.model_info.clone(); let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX); let response_item: ResponseItem = ResponseInputItem::from(input.clone()).into(); let mut incoming_turn_items = pre_turn_context_items.clone(); incoming_turn_items.push(response_item.clone()); let event = EventMsg::TurnStarted(TurnStartedEvent { turn_id: turn_context.sub_id.clone(), model_context_window: turn_context.model_context_window(), collaboration_mode_kind: turn_context.collaboration_mode.mode, }); sess.send_event(&turn_context, event).await; let total_usage_tokens_before_compaction = sess.get_total_token_usage().await; if maybe_run_previous_model_inline_compact( &sess, &turn_context, total_usage_tokens_before_compaction, &pre_turn_context_items, ) .await .is_err() { // Error messaging is emitted inside maybe_run_previous_model_inline_compact. return None; } let Ok(pre_turn_compaction_outcome) = run_pre_turn_auto_compaction_if_needed( &sess, &turn_context, auto_compact_limit, &incoming_turn_items, &pre_turn_context_items, ) .await else { // Error messaging is emitted inside run_pre_turn_auto_compaction_if_needed. return None; }; persist_pre_turn_items_for_compaction_outcome( &sess, &turn_context, pre_turn_compaction_outcome, &pre_turn_context_items, &input, response_item, ) .await; let skills_outcome = Some( sess.services .skills_manager .skills_for_cwd(&turn_context.cwd, false) .await, ); let available_connectors = if turn_context.config.features.enabled(Feature::Apps) { let mcp_tools = match sess .services .mcp_connection_manager .read() .await .list_all_tools() .or_cancel(&cancellation_token) .await { Ok(mcp_tools) => mcp_tools, Err(_) => return None, }; connectors::with_app_enabled_state( connectors::accessible_connectors_from_mcp_tools(&mcp_tools), &turn_context.config, ) } else { Vec::new() }; let connector_slug_counts = build_connector_slug_counts(&available_connectors); let skill_name_counts_lower = skills_outcome .as_ref() .map_or_else(HashMap::new, |outcome| { build_skill_name_counts(&outcome.skills, &outcome.disabled_paths).1 }); let mentioned_skills = skills_outcome.as_ref().map_or_else(Vec::new, |outcome| { collect_explicit_skill_mentions( &input, &outcome.skills, &outcome.disabled_paths, &connector_slug_counts, ) }); let config = turn_context.config.clone(); if config .features .enabled(Feature::SkillEnvVarDependencyPrompt) { let env_var_dependencies = collect_env_var_dependencies(&mentioned_skills); resolve_skill_dependencies_for_turn(&sess, &turn_context, &env_var_dependencies).await; } maybe_prompt_and_install_mcp_dependencies( sess.as_ref(), turn_context.as_ref(), &cancellation_token, &mentioned_skills, ) .await; let otel_manager = turn_context.otel_manager.clone(); let thread_id = sess.conversation_id.to_string(); let tracking = build_track_events_context( turn_context.model_info.slug.clone(), thread_id, turn_context.sub_id.clone(), ); let SkillInjections { items: skill_items, warnings: skill_warnings, } = build_skill_injections( &mentioned_skills, Some(&otel_manager), &sess.services.analytics_events_client, tracking.clone(), ) .await; for message in skill_warnings { sess.send_event(&turn_context, EventMsg::Warning(WarningEvent { message })) .await; } let mut explicitly_enabled_connectors = collect_explicit_app_ids(&input); explicitly_enabled_connectors.extend(collect_explicit_app_ids_from_skill_items( &skill_items, &available_connectors, &skill_name_counts_lower, )); let connector_names_by_id = available_connectors .iter() .map(|connector| (connector.id.as_str(), connector.name.as_str())) .collect::>(); let mentioned_app_invocations = explicitly_enabled_connectors .iter() .map(|connector_id| AppInvocation { connector_id: Some(connector_id.clone()), app_name: connector_names_by_id .get(connector_id.as_str()) .map(|name| (*name).to_string()), invoke_type: Some("explicit".to_string()), }) .collect::>(); sess.services .analytics_events_client .track_app_mentioned(tracking.clone(), mentioned_app_invocations); sess.merge_connector_selection(explicitly_enabled_connectors.clone()) .await; if !skill_items.is_empty() { sess.record_conversation_items(&turn_context, &skill_items) .await; } sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token()) .await; let mut last_agent_message: Option = None; // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains // many turns, from the perspective of the user, it is a single turn. let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let mut server_model_warning_emitted_for_turn = false; // `ModelClientSession` is turn-scoped and caches WebSocket + sticky routing state, so we reuse // one instance across retries within this turn. let mut client_session = prewarmed_client_session.unwrap_or_else(|| sess.services.model_client.new_session()); loop { // Note that pending_input would be something like a message the user // submitted through the UI while the model was running. Though the UI // may support this, the model might not. let pending_response_items = sess .get_pending_input() .await .into_iter() .map(ResponseItem::from) .collect::>(); if !pending_response_items.is_empty() { for response_item in pending_response_items { if let Some(TurnItem::UserMessage(user_message)) = parse_turn_item(&response_item) { // todo(aibrahim): move pending input to be UserInput only to keep TextElements. context: https://github.com/openai/codex/pull/10656#discussion_r2765522480 sess.record_user_prompt_and_emit_turn_item( turn_context.as_ref(), &user_message.content, response_item, ) .await; } else { sess.record_conversation_items( &turn_context, std::slice::from_ref(&response_item), ) .await; } } } // Construct the input that we will send to the model. let sampling_request_input: Vec = { sess.clone_history() .await .for_prompt(&turn_context.model_info.input_modalities) }; let sampling_request_input_messages = sampling_request_input .iter() .filter_map(|item| match parse_turn_item(item) { Some(TurnItem::UserMessage(user_message)) => Some(user_message), _ => None, }) .map(|user_message| user_message.message()) .collect::>(); let turn_metadata_header = turn_context.turn_metadata_state.current_header_value(); match run_sampling_request( Arc::clone(&sess), Arc::clone(&turn_context), Arc::clone(&turn_diff_tracker), &mut client_session, turn_metadata_header.as_deref(), sampling_request_input, &explicitly_enabled_connectors, skills_outcome.as_ref(), &mut server_model_warning_emitted_for_turn, cancellation_token.child_token(), ) .await { Ok(sampling_request_output) => { let SamplingRequestResult { needs_follow_up, last_agent_message: sampling_request_last_agent_message, } = sampling_request_output; let total_usage_tokens = sess.get_total_token_usage().await; let token_limit_reached = total_usage_tokens >= auto_compact_limit; let estimated_token_count = sess.get_estimated_token_count(turn_context.as_ref()).await; trace!( turn_id = %turn_context.sub_id, total_usage_tokens, estimated_token_count = ?estimated_token_count, auto_compact_limit, token_limit_reached, needs_follow_up, "post sampling token usage" ); // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop. if token_limit_reached && needs_follow_up { if let Err(err) = run_auto_compact( &sess, &turn_context, CompactCallsite::MidTurnContinuation, None, ) .await { if matches!(err, CodexErr::Interrupted) { return None; } let event = EventMsg::Error( err.to_error_event(Some("Error running auto compact task".to_string())), ); sess.send_event(&turn_context, event).await; return None; } continue; } if !needs_follow_up { last_agent_message = sampling_request_last_agent_message; let hook_outcomes = sess .hooks() .dispatch(HookPayload { session_id: sess.conversation_id, cwd: turn_context.cwd.clone(), triggered_at: chrono::Utc::now(), hook_event: HookEvent::AfterAgent { event: HookEventAfterAgent { thread_id: sess.conversation_id, turn_id: turn_context.sub_id.clone(), input_messages: sampling_request_input_messages, last_assistant_message: last_agent_message.clone(), }, }, }) .await; let mut abort_message = None; for hook_outcome in hook_outcomes { let hook_name = hook_outcome.hook_name; match hook_outcome.result { HookResult::Success => {} HookResult::FailedContinue(error) => { warn!( turn_id = %turn_context.sub_id, hook_name = %hook_name, error = %error, "after_agent hook failed; continuing" ); } HookResult::FailedAbort(error) => { let message = format!( "after_agent hook '{hook_name}' failed and aborted turn completion: {error}" ); warn!( turn_id = %turn_context.sub_id, hook_name = %hook_name, error = %error, "after_agent hook failed; aborting operation" ); if abort_message.is_none() { abort_message = Some(message); } } } } if let Some(message) = abort_message { sess.send_event( &turn_context, EventMsg::Error(ErrorEvent { message, codex_error_info: None, }), ) .await; return None; } break; } continue; } Err(CodexErr::TurnAborted) => { // Aborted turn is reported via a different event. break; } Err(CodexErr::InvalidImageRequest()) => { let mut state = sess.state.lock().await; error_or_panic( "Invalid image detected; sanitizing tool output to prevent poisoning", ); if state.history.replace_last_turn_images("Invalid image") { continue; } let event = EventMsg::Error(ErrorEvent { message: "Invalid image in your last message. Please remove it and try again." .to_string(), codex_error_info: Some(CodexErrorInfo::BadRequest), }); sess.send_event(&turn_context, event).await; break; } Err(e) => { info!("Turn error: {e:#}"); let event = EventMsg::Error(e.to_error_event(None)); sess.send_event(&turn_context, event).await; // let the user continue the conversation break; } } } last_agent_message } /// Runs the pre-sampling model-switch compaction pass when needed. /// /// On failure this function emits any user-visible error event itself and returns `Err(())` as a /// sentinel so callers can stop the turn without duplicating error messaging logic. async fn maybe_run_previous_model_inline_compact( sess: &Arc, turn_context: &Arc, total_usage_tokens: i64, pre_turn_context_items: &[ResponseItem], ) -> Result<(), ()> { let Some(previous_model) = sess.previous_model().await else { return Ok(()); }; let previous_turn_context = Arc::new( turn_context .with_model(previous_model, &sess.services.models_manager) .await, ); let Some(old_context_window) = previous_turn_context.model_context_window() else { return Ok(()); }; let Some(new_context_window) = turn_context.model_context_window() else { return Ok(()); }; let new_auto_compact_limit = turn_context .model_info .auto_compact_token_limit() .unwrap_or(i64::MAX); let should_run = total_usage_tokens > new_auto_compact_limit && previous_turn_context.model_info.slug != turn_context.model_info.slug && old_context_window > new_context_window; if !should_run { return Ok(()); } match run_auto_compact( sess, // We use previous turn context here because we compact with the previous model &previous_turn_context, CompactCallsite::PreSamplingModelSwitch, None, ) .await { Ok(()) => Ok(()), Err(err) => { if !pre_turn_context_items.is_empty() { // Preserve model-visible settings updates even when pre-turn compaction fails // before we can persist turn input. sess.record_conversation_items(turn_context, pre_turn_context_items) .await; } if matches!(err, CodexErr::Interrupted) { return Err(()); } let compact_error_prefix = if should_use_remote_compact_task(&turn_context.provider) { "Error running remote compact task" } else { "Error running local compact task" }; let event = EventMsg::Error(err.to_error_event(Some(compact_error_prefix.to_string()))); sess.send_event(turn_context, event).await; Err(()) } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum PreTurnCompactionOutcome { /// Pre-turn input fits without compaction. NotNeeded, /// Pre-turn compaction succeeded with incoming turn context + user message included. CompactedWithIncomingItems, /// Pre-turn compaction succeeded without incoming turn items /// (incoming user message should be appended after the compaction summary). /// This compaction strategy is currently out of distribution for our compaction model, /// but is planned to be trained on in the future. #[cfg(test)] CompactedWithoutIncomingItems, } async fn persist_pre_turn_items_for_compaction_outcome( sess: &Arc, turn_context: &Arc, outcome: PreTurnCompactionOutcome, pre_turn_context_items: &[ResponseItem], input: &[UserInput], response_item: ResponseItem, ) { match outcome { PreTurnCompactionOutcome::NotNeeded => { if !pre_turn_context_items.is_empty() { sess.record_conversation_items(turn_context, pre_turn_context_items) .await; } sess.record_user_prompt_and_emit_turn_item(turn_context.as_ref(), input, response_item) .await; } PreTurnCompactionOutcome::CompactedWithIncomingItems => { // Pre-turn compaction includes incoming items only for the compaction request itself. // Persist canonical turn context directly above the incoming user item so context // applies to the latest user message in post-compaction history. let initial_context = sess.build_initial_context(turn_context.as_ref()).await; if !initial_context.is_empty() { sess.record_conversation_items(turn_context, &initial_context) .await; } sess.record_user_prompt_and_emit_turn_item(turn_context.as_ref(), input, response_item) .await; } // TODO(ccunningham): Followup PR will use compacting excluding incoming items as a fallback // (even though it is out of distribution for current models). // Also future models may prefer compacting pre-turn history without incoming turn items. #[cfg(test)] PreTurnCompactionOutcome::CompactedWithoutIncomingItems => { // Reseed canonical initial context above the incoming user message. let initial_context = sess.build_initial_context(turn_context.as_ref()).await; if !initial_context.is_empty() { sess.record_conversation_items(turn_context, &initial_context) .await; } sess.record_user_prompt_and_emit_turn_item(turn_context.as_ref(), input, response_item) .await; } } } /// Runs pre-turn auto-compaction with incoming turn context + user message included. /// /// On failure this function emits any user-visible error event itself and returns `Err(())` as a /// sentinel so callers can stop the turn without duplicating error messaging logic. async fn run_pre_turn_auto_compaction_if_needed( sess: &Arc, turn_context: &Arc, auto_compact_limit: i64, incoming_turn_items: &[ResponseItem], pre_turn_context_items: &[ResponseItem], ) -> Result { let total_usage_tokens = sess.get_total_token_usage().await; let incoming_items_tokens_estimate = incoming_turn_items .iter() .map(estimate_item_token_count) .fold(0_i64, i64::saturating_add); if !is_projected_submission_over_auto_compact_limit( total_usage_tokens, incoming_items_tokens_estimate, auto_compact_limit, ) { return Ok(PreTurnCompactionOutcome::NotNeeded); } let compact_result = run_auto_compact( sess, turn_context, CompactCallsite::PreTurnIncludingIncomingUserMessage, Some(incoming_turn_items.to_vec()), ) .await; if let Err(err) = compact_result { if !pre_turn_context_items.is_empty() { // Preserve model-visible settings updates even when pre-turn compaction fails // before we can persist turn input. sess.record_conversation_items(turn_context, pre_turn_context_items) .await; } if matches!(err, CodexErr::Interrupted) { return Err(()); } let event = match err { CodexErr::ContextWindowExceeded => { error!( turn_id = %turn_context.sub_id, auto_compact_callsite = ?CompactCallsite::PreTurnIncludingIncomingUserMessage, incoming_items_tokens_estimate, auto_compact_limit, reason = "pre-turn compaction exceeded context window", "incoming user/context is too large for pre-turn auto-compaction flow" ); let message = format!( "Incoming user message and/or turn context is too large to fit in context window. Please reduce the size of your message and try again. (incoming_items_tokens_estimate={incoming_items_tokens_estimate})" ); EventMsg::Error(CodexErr::ContextWindowExceeded.to_error_event(Some(message))) } other => { let compact_error_prefix = if should_use_remote_compact_task(&turn_context.provider) { "Error running remote compact task" } else { "Error running local compact task" }; EventMsg::Error(other.to_error_event(Some(compact_error_prefix.to_string()))) } }; sess.send_event(turn_context, event).await; return Err(()); } // If compaction no-oped because there was no user-turn boundary even after including incoming // items, do not treat this as "compacted with incoming". The caller must persist incoming // items explicitly in this case. let has_user_turn_boundary_after_compaction = sess .clone_history() .await .raw_items() .iter() .any(crate::context_manager::is_user_turn_boundary); if !has_user_turn_boundary_after_compaction { return Ok(PreTurnCompactionOutcome::NotNeeded); } Ok(PreTurnCompactionOutcome::CompactedWithIncomingItems) } fn is_projected_submission_over_auto_compact_limit( total_usage_tokens: i64, incoming_user_tokens_estimate: i64, auto_compact_limit: i64, ) -> bool { if auto_compact_limit == i64::MAX { return false; } total_usage_tokens.saturating_add(incoming_user_tokens_estimate) >= auto_compact_limit } async fn run_auto_compact( sess: &Arc, turn_context: &Arc, auto_compact_callsite: CompactCallsite, incoming_items: Option>, ) -> CodexResult<()> { let result = if should_use_remote_compact_task(&turn_context.provider) { run_inline_remote_auto_compact_task( Arc::clone(sess), Arc::clone(turn_context), auto_compact_callsite, incoming_items, ) .await } else { run_inline_auto_compact_task( Arc::clone(sess), Arc::clone(turn_context), auto_compact_callsite, incoming_items, ) .await }; if let Err(err) = &result { error!( turn_id = %turn_context.sub_id, auto_compact_callsite = ?auto_compact_callsite, compact_error = %err, "auto compaction failed" ); } result } fn collect_explicit_app_ids_from_skill_items( skill_items: &[ResponseItem], connectors: &[connectors::AppInfo], skill_name_counts_lower: &HashMap, ) -> HashSet { if skill_items.is_empty() || connectors.is_empty() { return HashSet::new(); } let skill_messages = skill_items .iter() .filter_map(|item| match item { ResponseItem::Message { content, .. } => { content.iter().find_map(|content_item| match content_item { ContentItem::InputText { text } => Some(text.clone()), _ => None, }) } _ => None, }) .collect::>(); if skill_messages.is_empty() { return HashSet::new(); } let mentions = collect_tool_mentions_from_messages(&skill_messages); let mention_names_lower = mentions .plain_names .iter() .map(|name| name.to_ascii_lowercase()) .collect::>(); let mut connector_ids = mentions .paths .iter() .filter(|path| tool_kind_for_path(path) == ToolMentionKind::App) .filter_map(|path| app_id_from_path(path).map(str::to_string)) .collect::>(); let connector_slug_counts = build_connector_slug_counts(connectors); for connector in connectors { let slug = connectors::connector_mention_slug(connector); let connector_count = connector_slug_counts.get(&slug).copied().unwrap_or(0); let skill_count = skill_name_counts_lower.get(&slug).copied().unwrap_or(0); if connector_count == 1 && skill_count == 0 && mention_names_lower.contains(&slug) { connector_ids.insert(connector.id.clone()); } } connector_ids } fn filter_connectors_for_input( connectors: &[connectors::AppInfo], input: &[ResponseItem], explicitly_enabled_connectors: &HashSet, skill_name_counts_lower: &HashMap, ) -> Vec { let connectors: Vec = connectors .iter() .filter(|connector| connector.is_enabled) .cloned() .collect::>(); if connectors.is_empty() { return Vec::new(); } let user_messages = collect_user_messages(input); if user_messages.is_empty() && explicitly_enabled_connectors.is_empty() { return Vec::new(); } let mentions = collect_tool_mentions_from_messages(&user_messages); let mention_names_lower = mentions .plain_names .iter() .map(|name| name.to_ascii_lowercase()) .collect::>(); let connector_slug_counts = build_connector_slug_counts(&connectors); let mut allowed_connector_ids = explicitly_enabled_connectors.clone(); for path in mentions .paths .iter() .filter(|path| tool_kind_for_path(path) == ToolMentionKind::App) { if let Some(connector_id) = app_id_from_path(path) { allowed_connector_ids.insert(connector_id.to_string()); } } connectors .into_iter() .filter(|connector| { connector_inserted_in_messages( connector, &mention_names_lower, &allowed_connector_ids, &connector_slug_counts, skill_name_counts_lower, ) }) .collect() } fn connector_inserted_in_messages( connector: &connectors::AppInfo, mention_names_lower: &HashSet, allowed_connector_ids: &HashSet, connector_slug_counts: &HashMap, skill_name_counts_lower: &HashMap, ) -> bool { if allowed_connector_ids.contains(&connector.id) { return true; } let mention_slug = connectors::connector_mention_slug(connector); let connector_count = connector_slug_counts .get(&mention_slug) .copied() .unwrap_or(0); let skill_count = skill_name_counts_lower .get(&mention_slug) .copied() .unwrap_or(0); connector_count == 1 && skill_count == 0 && mention_names_lower.contains(&mention_slug) } fn filter_codex_apps_mcp_tools( mcp_tools: &HashMap, connectors: &[connectors::AppInfo], ) -> HashMap { let allowed: HashSet<&str> = connectors .iter() .map(|connector| connector.id.as_str()) .collect(); mcp_tools .iter() .filter(|(_, tool)| { if tool.server_name != CODEX_APPS_MCP_SERVER_NAME { return true; } let Some(connector_id) = codex_apps_connector_id(tool) else { return false; }; allowed.contains(connector_id) }) .map(|(name, tool)| (name.clone(), tool.clone())) .collect() } fn codex_apps_connector_id(tool: &crate::mcp_connection_manager::ToolInfo) -> Option<&str> { tool.connector_id.as_deref() } #[allow(clippy::too_many_arguments)] #[instrument(level = "trace", skip_all, fields( turn_id = %turn_context.sub_id, model = %turn_context.model_info.slug, cwd = %turn_context.cwd.display() ) )] async fn run_sampling_request( sess: Arc, turn_context: Arc, turn_diff_tracker: SharedTurnDiffTracker, client_session: &mut ModelClientSession, turn_metadata_header: Option<&str>, input: Vec, explicitly_enabled_connectors: &HashSet, skills_outcome: Option<&SkillLoadOutcome>, server_model_warning_emitted_for_turn: &mut bool, cancellation_token: CancellationToken, ) -> CodexResult { let router = built_tools( sess.as_ref(), turn_context.as_ref(), &input, explicitly_enabled_connectors, skills_outcome, &cancellation_token, ) .await?; let model_supports_parallel = turn_context.model_info.supports_parallel_tool_calls; let tools = crate::tools::spec::filter_tools_for_model(router.specs(), &turn_context.tools_config); let base_instructions = sess.get_base_instructions().await; let prompt = Prompt { input, tools, parallel_tool_calls: model_supports_parallel, base_instructions, personality: turn_context.personality, output_schema: turn_context.final_output_json_schema.clone(), }; let mut retries = 0; loop { let err = match try_run_sampling_request( Arc::clone(&router), Arc::clone(&sess), Arc::clone(&turn_context), client_session, turn_metadata_header, Arc::clone(&turn_diff_tracker), server_model_warning_emitted_for_turn, &prompt, cancellation_token.child_token(), ) .await { Ok(output) => { return Ok(output); } Err(CodexErr::ContextWindowExceeded) => { sess.set_total_tokens_full(&turn_context).await; return Err(CodexErr::ContextWindowExceeded); } Err(CodexErr::UsageLimitReached(e)) => { let rate_limits = e.rate_limits.clone(); if let Some(rate_limits) = rate_limits { sess.update_rate_limits(&turn_context, *rate_limits).await; } return Err(CodexErr::UsageLimitReached(e)); } Err(err) => err, }; if !err.is_retryable() { return Err(err); } // Use the configured provider-specific stream retry budget. let max_retries = turn_context.provider.stream_max_retries(); if retries >= max_retries && client_session .try_switch_fallback_transport(&turn_context.otel_manager, &turn_context.model_info) { sess.send_event( &turn_context, EventMsg::Warning(WarningEvent { message: format!("Falling back from WebSockets to HTTPS transport. {err:#}"), }), ) .await; retries = 0; continue; } if retries < max_retries { retries += 1; let delay = match &err { CodexErr::Stream(_, requested_delay) => { requested_delay.unwrap_or_else(|| backoff(retries)) } _ => backoff(retries), }; warn!( "stream disconnected - retrying sampling request ({retries}/{max_retries} in {delay:?})...", ); // In release builds, hide the first websocket retry notification to reduce noisy // transient reconnect messages. In debug builds, keep full visibility for diagnosis. let report_error = retries > 1 || cfg!(debug_assertions) || !sess .services .model_client .responses_websocket_enabled(&turn_context.model_info); if report_error { // Surface retry information to any UI/front‑end so the // user understands what is happening instead of staring // at a seemingly frozen screen. sess.notify_stream_error( &turn_context, format!("Reconnecting... {retries}/{max_retries}"), err, ) .await; } tokio::time::sleep(delay).await; } else { return Err(err); } } } async fn built_tools( sess: &Session, turn_context: &TurnContext, input: &[ResponseItem], explicitly_enabled_connectors: &HashSet, skills_outcome: Option<&SkillLoadOutcome>, cancellation_token: &CancellationToken, ) -> CodexResult> { let mcp_connection_manager = sess.services.mcp_connection_manager.read().await; let has_mcp_servers = mcp_connection_manager.has_servers(); let mut mcp_tools = mcp_connection_manager .list_all_tools() .or_cancel(cancellation_token) .await?; drop(mcp_connection_manager); let mut effective_explicitly_enabled_connectors = explicitly_enabled_connectors.clone(); effective_explicitly_enabled_connectors.extend(sess.get_connector_selection().await); let connectors = if turn_context.features.enabled(Feature::Apps) { Some(connectors::with_app_enabled_state( connectors::accessible_connectors_from_mcp_tools(&mcp_tools), &turn_context.config, )) } else { None }; let app_tools = connectors .as_ref() .map(|connectors| filter_codex_apps_mcp_tools(&mcp_tools, connectors)); if let Some(connectors) = connectors.as_ref() { let skill_name_counts_lower = skills_outcome.map_or_else(HashMap::new, |outcome| { build_skill_name_counts(&outcome.skills, &outcome.disabled_paths).1 }); let explicitly_enabled = filter_connectors_for_input( connectors, input, &effective_explicitly_enabled_connectors, &skill_name_counts_lower, ); let mut selected_mcp_tools = filter_non_codex_apps_mcp_tools_only(&mcp_tools); if let Some(selected_tools) = sess.get_mcp_tool_selection().await { selected_mcp_tools.extend(filter_mcp_tools_by_name(&mcp_tools, &selected_tools)); } selected_mcp_tools.extend(filter_codex_apps_mcp_tools_only( &mcp_tools, explicitly_enabled.as_ref(), )); mcp_tools = selected_mcp_tools; } Ok(Arc::new(ToolRouter::from_config( &turn_context.tools_config, has_mcp_servers.then(|| { mcp_tools .into_iter() .map(|(name, tool)| (name, tool.tool)) .collect() }), app_tools, turn_context.dynamic_tools.as_slice(), ))) } #[derive(Debug)] struct SamplingRequestResult { needs_follow_up: bool, last_agent_message: Option, } /// Ephemeral per-response state for streaming a single proposed plan. /// This is intentionally not persisted or stored in session/state since it /// only exists while a response is actively streaming. The final plan text /// is extracted from the completed assistant message. /// Tracks a single proposed plan item across a streaming response. struct ProposedPlanItemState { item_id: String, started: bool, completed: bool, } /// Per-item plan parsers so we can buffer text while detecting `` /// tags without ever mixing buffered lines across item ids. struct PlanParsers { assistant: HashMap, } impl PlanParsers { fn new() -> Self { Self { assistant: HashMap::new(), } } fn assistant_parser_mut(&mut self, item_id: &str) -> &mut ProposedPlanParser { self.assistant .entry(item_id.to_string()) .or_insert_with(ProposedPlanParser::new) } fn take_assistant_parser(&mut self, item_id: &str) -> Option { self.assistant.remove(item_id) } fn drain_assistant_parsers(&mut self) -> Vec<(String, ProposedPlanParser)> { self.assistant.drain().collect() } } /// Aggregated state used only while streaming a plan-mode response. /// Includes per-item parsers, deferred agent message bookkeeping, and the plan item lifecycle. struct PlanModeStreamState { /// Per-item parsers for assistant streams in plan mode. plan_parsers: PlanParsers, /// Agent message items started by the model but deferred until we see non-plan text. pending_agent_message_items: HashMap, /// Agent message items whose start notification has been emitted. started_agent_message_items: HashSet, /// Leading whitespace buffered until we see non-whitespace text for an item. leading_whitespace_by_item: HashMap, /// Tracks plan item lifecycle while streaming plan output. plan_item_state: ProposedPlanItemState, } impl PlanModeStreamState { fn new(turn_id: &str) -> Self { Self { plan_parsers: PlanParsers::new(), pending_agent_message_items: HashMap::new(), started_agent_message_items: HashSet::new(), leading_whitespace_by_item: HashMap::new(), plan_item_state: ProposedPlanItemState::new(turn_id), } } } impl ProposedPlanItemState { fn new(turn_id: &str) -> Self { Self { item_id: format!("{turn_id}-plan"), started: false, completed: false, } } async fn start(&mut self, sess: &Session, turn_context: &TurnContext) { if self.started || self.completed { return; } self.started = true; let item = TurnItem::Plan(PlanItem { id: self.item_id.clone(), text: String::new(), }); sess.emit_turn_item_started(turn_context, &item).await; } async fn push_delta(&mut self, sess: &Session, turn_context: &TurnContext, delta: &str) { if self.completed { return; } if delta.is_empty() { return; } let event = PlanDeltaEvent { thread_id: sess.conversation_id.to_string(), turn_id: turn_context.sub_id.clone(), item_id: self.item_id.clone(), delta: delta.to_string(), }; sess.send_event(turn_context, EventMsg::PlanDelta(event)) .await; } async fn complete_with_text( &mut self, sess: &Session, turn_context: &TurnContext, text: String, ) { if self.completed || !self.started { return; } self.completed = true; let item = TurnItem::Plan(PlanItem { id: self.item_id.clone(), text, }); sess.emit_turn_item_completed(turn_context, item).await; } } /// In plan mode we defer agent message starts until the parser emits non-plan /// text. The parser buffers each line until it can rule out a tag prefix, so /// plan-only outputs never show up as empty assistant messages. async fn maybe_emit_pending_agent_message_start( sess: &Session, turn_context: &TurnContext, state: &mut PlanModeStreamState, item_id: &str, ) { if state.started_agent_message_items.contains(item_id) { return; } if let Some(item) = state.pending_agent_message_items.remove(item_id) { sess.emit_turn_item_started(turn_context, &item).await; state .started_agent_message_items .insert(item_id.to_string()); } } /// Agent messages are text-only today; concatenate all text entries. fn agent_message_text(item: &codex_protocol::items::AgentMessageItem) -> String { item.content .iter() .map(|entry| match entry { codex_protocol::items::AgentMessageContent::Text { text } => text.as_str(), }) .collect() } /// Split the stream into normal assistant text vs. proposed plan content. /// Normal text becomes AgentMessage deltas; plan content becomes PlanDelta + /// TurnItem::Plan. async fn handle_plan_segments( sess: &Session, turn_context: &TurnContext, state: &mut PlanModeStreamState, item_id: &str, segments: Vec, ) { for segment in segments { match segment { ProposedPlanSegment::Normal(delta) => { if delta.is_empty() { continue; } let has_non_whitespace = delta.chars().any(|ch| !ch.is_whitespace()); if !has_non_whitespace && !state.started_agent_message_items.contains(item_id) { let entry = state .leading_whitespace_by_item .entry(item_id.to_string()) .or_default(); entry.push_str(&delta); continue; } let delta = if !state.started_agent_message_items.contains(item_id) { if let Some(prefix) = state.leading_whitespace_by_item.remove(item_id) { format!("{prefix}{delta}") } else { delta } } else { delta }; maybe_emit_pending_agent_message_start(sess, turn_context, state, item_id).await; let event = AgentMessageContentDeltaEvent { thread_id: sess.conversation_id.to_string(), turn_id: turn_context.sub_id.clone(), item_id: item_id.to_string(), delta, }; sess.send_event(turn_context, EventMsg::AgentMessageContentDelta(event)) .await; } ProposedPlanSegment::ProposedPlanStart => { if !state.plan_item_state.completed { state.plan_item_state.start(sess, turn_context).await; } } ProposedPlanSegment::ProposedPlanDelta(delta) => { if !state.plan_item_state.completed { if !state.plan_item_state.started { state.plan_item_state.start(sess, turn_context).await; } state .plan_item_state .push_delta(sess, turn_context, &delta) .await; } } ProposedPlanSegment::ProposedPlanEnd => {} } } } /// Flush any buffered proposed-plan segments when a specific assistant message ends. async fn flush_proposed_plan_segments_for_item( sess: &Session, turn_context: &TurnContext, state: &mut PlanModeStreamState, item_id: &str, ) { let Some(mut parser) = state.plan_parsers.take_assistant_parser(item_id) else { return; }; let segments = parser.finish(); if segments.is_empty() { return; } handle_plan_segments(sess, turn_context, state, item_id, segments).await; } /// Flush any remaining assistant plan parsers when the response completes. async fn flush_proposed_plan_segments_all( sess: &Session, turn_context: &TurnContext, state: &mut PlanModeStreamState, ) { for (item_id, mut parser) in state.plan_parsers.drain_assistant_parsers() { let segments = parser.finish(); if segments.is_empty() { continue; } handle_plan_segments(sess, turn_context, state, &item_id, segments).await; } } /// Emit completion for plan items by parsing the finalized assistant message. async fn maybe_complete_plan_item_from_message( sess: &Session, turn_context: &TurnContext, state: &mut PlanModeStreamState, item: &ResponseItem, ) { if let ResponseItem::Message { role, content, .. } = item && role == "assistant" { let mut text = String::new(); for entry in content { if let ContentItem::OutputText { text: chunk } = entry { text.push_str(chunk); } } if let Some(plan_text) = extract_proposed_plan_text(&text) { if !state.plan_item_state.started { state.plan_item_state.start(sess, turn_context).await; } state .plan_item_state .complete_with_text(sess, turn_context, plan_text) .await; } } } /// Emit a completed agent message in plan mode, respecting deferred starts. async fn emit_agent_message_in_plan_mode( sess: &Session, turn_context: &TurnContext, agent_message: codex_protocol::items::AgentMessageItem, state: &mut PlanModeStreamState, ) { let agent_message_id = agent_message.id.clone(); let text = agent_message_text(&agent_message); if text.trim().is_empty() { state.pending_agent_message_items.remove(&agent_message_id); state.started_agent_message_items.remove(&agent_message_id); return; } maybe_emit_pending_agent_message_start(sess, turn_context, state, &agent_message_id).await; if !state .started_agent_message_items .contains(&agent_message_id) { let start_item = state .pending_agent_message_items .remove(&agent_message_id) .unwrap_or_else(|| { TurnItem::AgentMessage(codex_protocol::items::AgentMessageItem { id: agent_message_id.clone(), content: Vec::new(), phase: None, }) }); sess.emit_turn_item_started(turn_context, &start_item).await; state .started_agent_message_items .insert(agent_message_id.clone()); } sess.emit_turn_item_completed(turn_context, TurnItem::AgentMessage(agent_message)) .await; state.started_agent_message_items.remove(&agent_message_id); } /// Emit completion for a plan-mode turn item, handling agent messages specially. async fn emit_turn_item_in_plan_mode( sess: &Session, turn_context: &TurnContext, turn_item: TurnItem, previously_active_item: Option<&TurnItem>, state: &mut PlanModeStreamState, ) { match turn_item { TurnItem::AgentMessage(agent_message) => { emit_agent_message_in_plan_mode(sess, turn_context, agent_message, state).await; } _ => { if previously_active_item.is_none() { sess.emit_turn_item_started(turn_context, &turn_item).await; } sess.emit_turn_item_completed(turn_context, turn_item).await; } } } /// Handle a completed assistant response item in plan mode, returning true if handled. async fn handle_assistant_item_done_in_plan_mode( sess: &Session, turn_context: &TurnContext, item: &ResponseItem, state: &mut PlanModeStreamState, previously_active_item: Option<&TurnItem>, last_agent_message: &mut Option, ) -> bool { if let ResponseItem::Message { role, .. } = item && role == "assistant" { maybe_complete_plan_item_from_message(sess, turn_context, state, item).await; if let Some(turn_item) = handle_non_tool_response_item(item, true).await { emit_turn_item_in_plan_mode( sess, turn_context, turn_item, previously_active_item, state, ) .await; } sess.record_conversation_items(turn_context, std::slice::from_ref(item)) .await; if let Some(agent_message) = last_assistant_message_from_item(item, true) { *last_agent_message = Some(agent_message); } return true; } false } async fn drain_in_flight( in_flight: &mut FuturesOrdered>>, sess: Arc, turn_context: Arc, ) -> CodexResult<()> { while let Some(res) = in_flight.next().await { match res { Ok(response_input) => { sess.record_conversation_items(&turn_context, &[response_input.into()]) .await; } Err(err) => { error_or_panic(format!("in-flight tool future failed during drain: {err}")); } } } Ok(()) } #[allow(clippy::too_many_arguments)] #[instrument(level = "trace", skip_all, fields( turn_id = %turn_context.sub_id, model = %turn_context.model_info.slug ) )] async fn try_run_sampling_request( router: Arc, sess: Arc, turn_context: Arc, client_session: &mut ModelClientSession, turn_metadata_header: Option<&str>, turn_diff_tracker: SharedTurnDiffTracker, server_model_warning_emitted_for_turn: &mut bool, prompt: &Prompt, cancellation_token: CancellationToken, ) -> CodexResult { let collaboration_mode = sess.current_collaboration_mode().await; let rollout_item = RolloutItem::TurnContext(turn_context.to_turn_context_item(collaboration_mode)); feedback_tags!( model = turn_context.model_info.slug.clone(), approval_policy = turn_context.approval_policy, sandbox_policy = turn_context.sandbox_policy, effort = turn_context.reasoning_effort, auth_mode = sess.services.auth_manager.auth_mode(), features = sess.features.enabled_features(), ); sess.persist_rollout_items(&[rollout_item]).await; let mut stream = client_session .stream( prompt, &turn_context.model_info, &turn_context.otel_manager, turn_context.reasoning_effort, turn_context.reasoning_summary, turn_metadata_header, ) .instrument(trace_span!("stream_request")) .or_cancel(&cancellation_token) .await??; let tool_runtime = ToolCallRuntime::new( Arc::clone(&router), Arc::clone(&sess), Arc::clone(&turn_context), Arc::clone(&turn_diff_tracker), ); let mut in_flight: FuturesOrdered>> = FuturesOrdered::new(); let mut needs_follow_up = false; let mut last_agent_message: Option = None; let mut active_item: Option = None; let mut should_emit_turn_diff = false; let plan_mode = turn_context.collaboration_mode.mode == ModeKind::Plan; let mut plan_mode_state = plan_mode.then(|| PlanModeStreamState::new(&turn_context.sub_id)); let receiving_span = trace_span!("receiving_stream"); let outcome: CodexResult = loop { let handle_responses = trace_span!( parent: &receiving_span, "handle_responses", otel.name = field::Empty, tool_name = field::Empty, from = field::Empty, ); let event = match stream .next() .instrument(trace_span!(parent: &handle_responses, "receiving")) .or_cancel(&cancellation_token) .await { Ok(event) => event, Err(codex_async_utils::CancelErr::Cancelled) => break Err(CodexErr::TurnAborted), }; let event = match event { Some(res) => res?, None => { break Err(CodexErr::Stream( "stream closed before response.completed".into(), None, )); } }; sess.services .otel_manager .record_responses(&handle_responses, &event); match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { let previously_active_item = active_item.take(); if let Some(state) = plan_mode_state.as_mut() { if let Some(previous) = previously_active_item.as_ref() { let item_id = previous.id(); if matches!(previous, TurnItem::AgentMessage(_)) { flush_proposed_plan_segments_for_item( &sess, &turn_context, state, &item_id, ) .await; } } if handle_assistant_item_done_in_plan_mode( &sess, &turn_context, &item, state, previously_active_item.as_ref(), &mut last_agent_message, ) .await { continue; } } let mut ctx = HandleOutputCtx { sess: sess.clone(), turn_context: turn_context.clone(), tool_runtime: tool_runtime.clone(), cancellation_token: cancellation_token.child_token(), }; let output_result = handle_output_item_done(&mut ctx, item, previously_active_item) .instrument(handle_responses) .await?; if let Some(tool_future) = output_result.tool_future { in_flight.push_back(tool_future); } if let Some(agent_message) = output_result.last_agent_message { last_agent_message = Some(agent_message); } needs_follow_up |= output_result.needs_follow_up; } ResponseEvent::OutputItemAdded(item) => { if let Some(turn_item) = handle_non_tool_response_item(&item, plan_mode).await { if let Some(state) = plan_mode_state.as_mut() && matches!(turn_item, TurnItem::AgentMessage(_)) { let item_id = turn_item.id(); state .pending_agent_message_items .insert(item_id, turn_item.clone()); } else { sess.emit_turn_item_started(&turn_context, &turn_item).await; } active_item = Some(turn_item); } } ResponseEvent::ServerModel(server_model) => { if !*server_model_warning_emitted_for_turn && sess .maybe_warn_on_server_model_mismatch(&turn_context, server_model) .await { *server_model_warning_emitted_for_turn = true; } } ResponseEvent::ServerReasoningIncluded(included) => { sess.set_server_reasoning_included(included).await; } ResponseEvent::RateLimits(snapshot) => { // Update internal state with latest rate limits, but defer sending until // token usage is available to avoid duplicate TokenCount events. sess.update_rate_limits(&turn_context, snapshot).await; } ResponseEvent::ModelsEtag(etag) => { // Update internal state with latest models etag sess.services.models_manager.refresh_if_new_etag(etag).await; } ResponseEvent::Completed { response_id: _response_id, token_usage, can_append: _can_append, } => { if let Some(state) = plan_mode_state.as_mut() { flush_proposed_plan_segments_all(&sess, &turn_context, state).await; } sess.update_token_usage_info(&turn_context, token_usage.as_ref()) .await; should_emit_turn_diff = true; needs_follow_up |= sess.has_pending_input().await; break Ok(SamplingRequestResult { needs_follow_up, last_agent_message, }); } ResponseEvent::OutputTextDelta(delta) => { // In review child threads, suppress assistant text deltas; the // UI will show a selection popup from the final ReviewOutput. if let Some(active) = active_item.as_ref() { let item_id = active.id(); if let Some(state) = plan_mode_state.as_mut() && matches!(active, TurnItem::AgentMessage(_)) { let segments = state .plan_parsers .assistant_parser_mut(&item_id) .parse(&delta); handle_plan_segments(&sess, &turn_context, state, &item_id, segments).await; } else { let event = AgentMessageContentDeltaEvent { thread_id: sess.conversation_id.to_string(), turn_id: turn_context.sub_id.clone(), item_id, delta, }; sess.send_event(&turn_context, EventMsg::AgentMessageContentDelta(event)) .await; } } else { error_or_panic("OutputTextDelta without active item".to_string()); } } ResponseEvent::ReasoningSummaryDelta { delta, summary_index, } => { if let Some(active) = active_item.as_ref() { let event = ReasoningContentDeltaEvent { thread_id: sess.conversation_id.to_string(), turn_id: turn_context.sub_id.clone(), item_id: active.id(), delta, summary_index, }; sess.send_event(&turn_context, EventMsg::ReasoningContentDelta(event)) .await; } else { error_or_panic("ReasoningSummaryDelta without active item".to_string()); } } ResponseEvent::ReasoningSummaryPartAdded { summary_index } => { if let Some(active) = active_item.as_ref() { let event = EventMsg::AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent { item_id: active.id(), summary_index, }); sess.send_event(&turn_context, event).await; } else { error_or_panic("ReasoningSummaryPartAdded without active item".to_string()); } } ResponseEvent::ReasoningContentDelta { delta, content_index, } => { if let Some(active) = active_item.as_ref() { let event = ReasoningRawContentDeltaEvent { thread_id: sess.conversation_id.to_string(), turn_id: turn_context.sub_id.clone(), item_id: active.id(), delta, content_index, }; sess.send_event(&turn_context, EventMsg::ReasoningRawContentDelta(event)) .await; } else { error_or_panic("ReasoningRawContentDelta without active item".to_string()); } } } }; drain_in_flight(&mut in_flight, sess.clone(), turn_context.clone()).await?; if should_emit_turn_diff { let unified_diff = { let mut tracker = turn_diff_tracker.lock().await; tracker.get_unified_diff() }; if let Ok(Some(unified_diff)) = unified_diff { let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); sess.clone().send_event(&turn_context, msg).await; } } outcome } pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option { responses.iter().rev().find_map(|item| { if let ResponseItem::Message { role, content, .. } = item { if role == "assistant" { content.iter().rev().find_map(|ci| { if let ContentItem::OutputText { text } = ci { Some(text.clone()) } else { None } }) } else { None } } else { None } }) } use crate::memories::prompts::build_memory_tool_developer_instructions; #[cfg(test)] pub(crate) use tests::make_session_and_context; #[cfg(test)] pub(crate) use tests::make_session_and_context_with_rx; #[cfg(test)] pub(crate) use tests::make_session_configuration_for_tests; #[cfg(test)] mod tests { use super::*; use crate::CodexAuth; use crate::config::ConfigBuilder; use crate::config::test_config; use crate::config_loader::ConfigLayerStack; use crate::config_loader::ConfigLayerStackOrdering; use crate::config_loader::NetworkConstraints; use crate::config_loader::RequirementSource; use crate::config_loader::Sourced; use crate::exec::ExecToolCallOutput; use crate::function_tool::FunctionCallError; use crate::mcp_connection_manager::ToolInfo; use crate::models_manager::model_info; use crate::shell::default_user_shell; use crate::tools::format_exec_output_str; use codex_protocol::ThreadId; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use crate::protocol::CompactedItem; use crate::protocol::CreditsSnapshot; use crate::protocol::InitialHistory; use crate::protocol::RateLimitSnapshot; use crate::protocol::RateLimitWindow; use crate::protocol::ResumedHistory; use crate::protocol::TokenCountEvent; use crate::protocol::TokenUsage; use crate::protocol::TokenUsageInfo; use crate::state::TaskKind; use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; use crate::tools::ToolRouter; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use crate::tools::handlers::ShellHandler; use crate::tools::handlers::UnifiedExecHandler; use crate::tools::registry::ToolHandler; use crate::tools::router::ToolCallSource; use crate::turn_diff_tracker::TurnDiffTracker; use codex_app_server_protocol::AppInfo; use codex_otel::TelemetryAuthMode; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ModelsResponse; use std::path::Path; use std::time::Duration; use tokio::time::sleep; use codex_protocol::mcp::CallToolResult as McpCallToolResult; use pretty_assertions::assert_eq; use rmcp::model::JsonObject; use rmcp::model::Tool; use serde::Deserialize; use serde_json::json; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration as StdDuration; struct InstructionsTestCase { slug: &'static str, expects_apply_patch_instructions: bool, } fn user_message(text: &str) -> ResponseItem { ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: text.to_string(), }], end_turn: None, phase: None, } } fn skill_message(text: &str) -> ResponseItem { ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: text.to_string(), }], end_turn: None, phase: None, } } #[test] fn pre_turn_projection_uses_incoming_user_tokens_for_compaction() { assert!(is_projected_submission_over_auto_compact_limit(90, 15, 100)); assert!(!is_projected_submission_over_auto_compact_limit(90, 9, 100)); } #[test] fn pre_turn_projection_does_not_compact_with_unbounded_limit() { assert!(!is_projected_submission_over_auto_compact_limit( i64::MAX - 1, 100, i64::MAX, )); } #[test] fn post_compaction_projection_triggers_error_when_still_over_limit() { assert!(is_projected_submission_over_auto_compact_limit(95, 10, 100)); assert!(is_projected_submission_over_auto_compact_limit( 100, 10, 100 )); assert!(!is_projected_submission_over_auto_compact_limit( 80, 10, 100 )); } #[tokio::test] async fn reserved_compacted_without_incoming_items_records_initial_context_and_prompt() { let (session, turn_context) = make_session_and_context().await; let session = Arc::new(session); let turn_context = Arc::new(turn_context); let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; let response_item: ResponseItem = ResponseInputItem::from(input.clone()).into(); let stale_pre_turn_context_items = vec![ResponseItem::Message { id: None, role: "developer".to_string(), content: vec![ContentItem::InputText { text: "stale context diff".to_string(), }], end_turn: None, phase: None, }]; persist_pre_turn_items_for_compaction_outcome( &session, &turn_context, PreTurnCompactionOutcome::CompactedWithoutIncomingItems, &stale_pre_turn_context_items, &input, response_item.clone(), ) .await; let mut expected = session.build_initial_context(turn_context.as_ref()).await; expected.push(response_item); let actual = session.clone_history().await.raw_items().to_vec(); assert_eq!(actual, expected); } #[tokio::test] async fn compacted_with_incoming_items_persists_context_and_prompt() { let (session, turn_context) = make_session_and_context().await; let session = Arc::new(session); let turn_context = Arc::new(turn_context); let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; let response_item: ResponseItem = ResponseInputItem::from(input.clone()).into(); let stale_pre_turn_context_items = vec![ResponseItem::Message { id: None, role: "developer".to_string(), content: vec![ContentItem::InputText { text: "stale context diff".to_string(), }], end_turn: None, phase: None, }]; persist_pre_turn_items_for_compaction_outcome( &session, &turn_context, PreTurnCompactionOutcome::CompactedWithIncomingItems, &stale_pre_turn_context_items, &input, response_item.clone(), ) .await; let actual = session.clone_history().await.raw_items().to_vec(); let mut expected = session.build_initial_context(turn_context.as_ref()).await; expected.push(response_item); assert_eq!(actual, expected); } #[tokio::test] async fn run_turn_persists_pre_turn_context_before_apps_tool_listing_cancellation() { let (session, mut turn_context) = make_session_and_context().await; let session = Arc::new(session); let mut config = (*turn_context.config).clone(); config.features.enable(Feature::Apps); turn_context.features = config.features.clone(); turn_context.config = Arc::new(config); let turn_context = Arc::new(turn_context); let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; let response_item: ResponseItem = ResponseInputItem::from(input.clone()).into(); let pre_turn_context_items = vec![ResponseItem::Message { id: None, role: "developer".to_string(), content: vec![ContentItem::InputText { text: "model-visible setting diff".to_string(), }], end_turn: None, phase: None, }]; let cancellation_token = CancellationToken::new(); cancellation_token.cancel(); let result = run_turn( Arc::clone(&session), Arc::clone(&turn_context), input, pre_turn_context_items.clone(), None, cancellation_token, ) .await; assert_eq!(result, None); let mut expected_history = pre_turn_context_items; expected_history.push(response_item); let actual_history = session.clone_history().await.raw_items().to_vec(); assert_eq!(actual_history, expected_history); } #[tokio::test] async fn pre_turn_auto_compaction_noop_without_user_turn_boundary_returns_not_needed() { let (session, turn_context) = make_session_and_context().await; let session = Arc::new(session); let turn_context = Arc::new(turn_context); { let mut state = session.state.lock().await; state.set_token_info(Some(TokenUsageInfo { total_token_usage: TokenUsage { total_tokens: 1_000, ..TokenUsage::default() }, last_token_usage: TokenUsage { total_tokens: 1_000, ..TokenUsage::default() }, model_context_window: turn_context.model_context_window(), })); } let incoming_turn_items = vec![ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "\necho hi\n".to_string(), }], end_turn: None, phase: None, }]; let outcome = run_pre_turn_auto_compaction_if_needed( &session, &turn_context, 10, &incoming_turn_items, &[], ) .await .expect("pre-turn compaction no-op should not fail"); assert_eq!(outcome, PreTurnCompactionOutcome::NotNeeded); assert_eq!(session.clone_history().await.raw_items(), &[]); } #[test] fn estimate_user_input_token_count_is_positive_for_text_input() { let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; let response_input_item = ResponseInputItem::from(input); let response_item: ResponseItem = response_input_item.into(); let estimated_tokens = estimate_item_token_count(&response_item); assert!(estimated_tokens > 0); } #[test] fn estimate_user_input_token_count_ignores_skill_and_mention_payload_lengths() { let short = vec![ UserInput::Skill { name: "s".to_string(), path: PathBuf::from("/s"), }, UserInput::Mention { name: "m".to_string(), path: "app://m".to_string(), }, ]; let long = vec![ UserInput::Skill { name: "very-long-skill-name-that-should-not-affect-prompt-serialization" .to_string(), path: PathBuf::from( "/very/long/skill/path/that/should/not/affect/prompt/serialization/SKILL.md", ), }, UserInput::Mention { name: "very-long-mention-name-that-should-not-affect-prompt-serialization" .to_string(), path: "app://very-long-connector-path-that-should-not-affect-prompt-serialization" .to_string(), }, ]; let short_response_input_item = ResponseInputItem::from(short); let long_response_input_item = ResponseInputItem::from(long); let short_response_item: ResponseItem = short_response_input_item.into(); let long_response_item: ResponseItem = long_response_input_item.into(); let short_tokens = estimate_item_token_count(&short_response_item); let long_tokens = estimate_item_token_count(&long_response_item); assert_eq!(short_tokens, long_tokens); } fn make_connector(id: &str, name: &str) -> AppInfo { AppInfo { id: id.to_string(), name: name.to_string(), description: None, logo_url: None, logo_url_dark: None, distribution_channel: None, branding: None, app_metadata: None, labels: None, install_url: None, is_accessible: true, is_enabled: true, } } fn make_mcp_tool( server_name: &str, tool_name: &str, connector_id: Option<&str>, connector_name: Option<&str>, ) -> ToolInfo { ToolInfo { server_name: server_name.to_string(), tool_name: tool_name.to_string(), tool: Tool { name: tool_name.to_string().into(), title: None, description: Some(format!("Test tool: {tool_name}").into()), input_schema: Arc::new(JsonObject::default()), output_schema: None, annotations: None, execution: None, icons: None, meta: None, }, connector_id: connector_id.map(str::to_string), connector_name: connector_name.map(str::to_string), } } fn function_call_rollout_item(name: &str, call_id: &str) -> RolloutItem { RolloutItem::ResponseItem(ResponseItem::FunctionCall { id: None, name: name.to_string(), arguments: "{}".to_string(), call_id: call_id.to_string(), }) } fn function_call_output_rollout_item(call_id: &str, output: &str) -> RolloutItem { RolloutItem::ResponseItem(ResponseItem::FunctionCallOutput { call_id: call_id.to_string(), output: FunctionCallOutputPayload::from_text(output.to_string()), }) } #[tokio::test] async fn get_base_instructions_no_user_content() { let prompt_with_apply_patch_instructions = include_str!("../prompt_with_apply_patch_instructions.md"); let models_response: ModelsResponse = serde_json::from_str(include_str!("../models.json")).expect("valid models.json"); let model_info_for_slug = |slug: &str, config: &Config| { let model = models_response .models .iter() .find(|candidate| candidate.slug == slug) .cloned() .unwrap_or_else(|| panic!("model slug {slug} is missing from models.json")); model_info::with_config_overrides(model, config) }; let test_cases = vec![ InstructionsTestCase { slug: "gpt-5", expects_apply_patch_instructions: false, }, InstructionsTestCase { slug: "gpt-5.1", expects_apply_patch_instructions: false, }, InstructionsTestCase { slug: "gpt-5.1-codex", expects_apply_patch_instructions: false, }, InstructionsTestCase { slug: "gpt-5.1-codex-max", expects_apply_patch_instructions: false, }, ]; let (session, _turn_context) = make_session_and_context().await; let config = test_config(); for test_case in test_cases { let model_info = model_info_for_slug(test_case.slug, &config); if test_case.expects_apply_patch_instructions { assert_eq!( model_info.base_instructions.as_str(), prompt_with_apply_patch_instructions ); } { let mut state = session.state.lock().await; state.session_configuration.base_instructions = model_info.base_instructions.clone(); } let base_instructions = session.get_base_instructions().await; assert_eq!(base_instructions.text, model_info.base_instructions); } } #[tokio::test] async fn reload_user_config_layer_updates_effective_apps_config() { let (session, _turn_context) = make_session_and_context().await; let codex_home = session.codex_home().await; std::fs::create_dir_all(&codex_home).expect("create codex home"); let config_toml_path = codex_home.join(CONFIG_TOML_FILE); std::fs::write( &config_toml_path, "[apps.calendar]\nenabled = false\ndisabled_reason = \"user\"\n", ) .expect("write user config"); session.reload_user_config_layer().await; let config = session.get_config().await; let apps_toml = config .config_layer_stack .effective_config() .as_table() .and_then(|table| table.get("apps")) .cloned() .expect("apps table"); let apps = crate::config::types::AppsConfigToml::deserialize(apps_toml) .expect("deserialize apps config"); let app = apps .apps .get("calendar") .expect("calendar app config exists"); assert!(!app.enabled); assert_eq!( app.disabled_reason, Some(crate::config::types::AppDisabledReason::User) ); } #[test] fn filter_connectors_for_input_skips_duplicate_slug_mentions() { let connectors = vec![ make_connector("one", "Foo Bar"), make_connector("two", "Foo-Bar"), ]; let input = vec![user_message("use $foo-bar")]; let explicitly_enabled_connectors = HashSet::new(); let skill_name_counts_lower = HashMap::new(); let selected = filter_connectors_for_input( &connectors, &input, &explicitly_enabled_connectors, &skill_name_counts_lower, ); assert_eq!(selected, Vec::new()); } #[test] fn filter_connectors_for_input_skips_when_skill_name_conflicts() { let connectors = vec![make_connector("one", "Todoist")]; let input = vec![user_message("use $todoist")]; let explicitly_enabled_connectors = HashSet::new(); let skill_name_counts_lower = HashMap::from([("todoist".to_string(), 1)]); let selected = filter_connectors_for_input( &connectors, &input, &explicitly_enabled_connectors, &skill_name_counts_lower, ); assert_eq!(selected, Vec::new()); } #[test] fn filter_connectors_for_input_skips_disabled_connectors() { let mut connector = make_connector("calendar", "Calendar"); connector.is_enabled = false; let input = vec![user_message("use $calendar")]; let explicitly_enabled_connectors = HashSet::new(); let selected = filter_connectors_for_input( &[connector], &input, &explicitly_enabled_connectors, &HashMap::new(), ); assert_eq!(selected, Vec::new()); } #[test] fn collect_explicit_app_ids_from_skill_items_includes_linked_mentions() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse [$calendar](app://calendar)\n", )]; let connector_ids = collect_explicit_app_ids_from_skill_items(&skill_items, &connectors, &HashMap::new()); assert_eq!(connector_ids, HashSet::from(["calendar".to_string()])); } #[test] fn collect_explicit_app_ids_from_skill_items_resolves_unambiguous_plain_mentions() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse $calendar\n", )]; let connector_ids = collect_explicit_app_ids_from_skill_items(&skill_items, &connectors, &HashMap::new()); assert_eq!(connector_ids, HashSet::from(["calendar".to_string()])); } #[test] fn collect_explicit_app_ids_from_skill_items_skips_plain_mentions_with_skill_conflicts() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse $calendar\n", )]; let skill_name_counts_lower = HashMap::from([("calendar".to_string(), 1)]); let connector_ids = collect_explicit_app_ids_from_skill_items( &skill_items, &connectors, &skill_name_counts_lower, ); assert_eq!(connector_ids, HashSet::::new()); } #[test] fn non_app_mcp_tools_remain_visible_without_search_selection() { let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool("rmcp", "echo", None, None), ), ]); let mut selected_mcp_tools = mcp_tools .iter() .filter(|(_, tool)| tool.server_name != CODEX_APPS_MCP_SERVER_NAME) .map(|(name, tool)| (name.clone(), tool.clone())) .collect::>(); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("run echo")], &explicitly_enabled_connectors, &HashMap::new(), ); let apps_mcp_tools = filter_codex_apps_mcp_tools_only(&mcp_tools, &connectors); selected_mcp_tools.extend(apps_mcp_tools); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!(tool_names, vec!["mcp__rmcp__echo".to_string()]); } #[test] fn search_tool_selection_keeps_codex_apps_tools_without_mentions() { let selected_tool_names = vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ]; let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool("rmcp", "echo", None, None), ), ]); let mut selected_mcp_tools = filter_mcp_tools_by_name(&mcp_tools, &selected_tool_names); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("run the selected tools")], &explicitly_enabled_connectors, &HashMap::new(), ); let apps_mcp_tools = filter_codex_apps_mcp_tools_only(&mcp_tools, &connectors); selected_mcp_tools.extend(apps_mcp_tools); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!( tool_names, vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ] ); } #[test] fn apps_mentions_add_codex_apps_tools_to_search_selected_set() { let selected_tool_names = vec!["mcp__rmcp__echo".to_string()]; let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool("rmcp", "echo", None, None), ), ]); let mut selected_mcp_tools = filter_mcp_tools_by_name(&mcp_tools, &selected_tool_names); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("use $calendar and then echo the response")], &explicitly_enabled_connectors, &HashMap::new(), ); let apps_mcp_tools = filter_codex_apps_mcp_tools_only(&mcp_tools, &connectors); selected_mcp_tools.extend(apps_mcp_tools); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!( tool_names, vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ] ); } #[test] fn extract_mcp_tool_selection_from_rollout_reads_search_tool_output() { let rollout_items = vec![ function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), function_call_output_rollout_item( "search-1", &json!({ "active_selected_tools": [ "mcp__codex_apps__calendar_create_event", "mcp__codex_apps__calendar_list_events", ], }) .to_string(), ), ]; let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); assert_eq!( selected, Some(vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__codex_apps__calendar_list_events".to_string(), ]) ); } #[test] fn extract_mcp_tool_selection_from_rollout_latest_valid_payload_wins() { let rollout_items = vec![ function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), function_call_output_rollout_item( "search-1", &json!({ "active_selected_tools": ["mcp__codex_apps__calendar_create_event"], }) .to_string(), ), function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-2"), function_call_output_rollout_item( "search-2", &json!({ "active_selected_tools": ["mcp__codex_apps__calendar_delete_event"], }) .to_string(), ), ]; let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); assert_eq!( selected, Some(vec!["mcp__codex_apps__calendar_delete_event".to_string(),]) ); } #[test] fn extract_mcp_tool_selection_from_rollout_ignores_non_search_and_malformed_payloads() { let rollout_items = vec![ function_call_rollout_item("shell", "shell-1"), function_call_output_rollout_item( "shell-1", &json!({ "active_selected_tools": ["mcp__codex_apps__should_be_ignored"], }) .to_string(), ), function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), function_call_output_rollout_item("search-1", "{not-json"), function_call_output_rollout_item( "unknown-search-call", &json!({ "active_selected_tools": ["mcp__codex_apps__also_ignored"], }) .to_string(), ), function_call_output_rollout_item( "search-1", &json!({ "active_selected_tools": ["mcp__codex_apps__calendar_list_events"], }) .to_string(), ), ]; let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); assert_eq!( selected, Some(vec!["mcp__codex_apps__calendar_list_events".to_string(),]) ); } #[test] fn extract_mcp_tool_selection_from_rollout_returns_none_without_valid_search_output() { let rollout_items = vec![function_call_rollout_item( SEARCH_TOOL_BM25_TOOL_NAME, "search-1", )]; let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); assert_eq!(selected, None); } #[tokio::test] async fn reconstruct_history_matches_live_compactions() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, expected) = sample_rollout(&session, &turn_context).await; let reconstruction_turn = session.new_default_turn().await; let reconstructed = session .reconstruct_history_from_rollout(reconstruction_turn.as_ref(), &rollout_items) .await; assert_eq!(expected, reconstructed); } #[tokio::test] async fn reconstruct_history_uses_replacement_history_verbatim() { let (session, turn_context) = make_session_and_context().await; let summary_item = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "summary".to_string(), }], end_turn: None, phase: None, }; let replacement_history = vec![ summary_item.clone(), ResponseItem::Message { id: None, role: "developer".to_string(), content: vec![ContentItem::InputText { text: "stale developer instructions".to_string(), }], end_turn: None, phase: None, }, ]; let rollout_items = vec![RolloutItem::Compacted(CompactedItem { message: String::new(), replacement_history: Some(replacement_history.clone()), })]; let reconstructed = session .reconstruct_history_from_rollout(&turn_context, &rollout_items) .await; assert_eq!(reconstructed, replacement_history); } #[tokio::test] async fn record_initial_history_reconstructs_resumed_transcript() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } #[tokio::test] async fn record_initial_history_resumed_hydrates_previous_model() { let (session, turn_context) = make_session_and_context().await; let previous_model = "previous-rollout-model"; let rollout_items = vec![RolloutItem::TurnContext(TurnContextItem { turn_id: Some(turn_context.sub_id.clone()), cwd: turn_context.cwd.clone(), approval_policy: turn_context.approval_policy, sandbox_policy: turn_context.sandbox_policy.clone(), network: None, model: previous_model.to_string(), personality: turn_context.personality, collaboration_mode: Some(turn_context.collaboration_mode.clone()), effort: turn_context.reasoning_effort, summary: turn_context.reasoning_summary, user_instructions: None, developer_instructions: None, final_output_json_schema: None, truncation_policy: Some(turn_context.truncation_policy.into()), })]; session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; assert_eq!( session.previous_model().await, Some(previous_model.to_string()) ); } #[tokio::test] async fn resumed_history_seeds_initial_context_on_first_turn_only() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, mut expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let history_before_seed = session.state.lock().await.clone_history(); assert_eq!(expected, history_before_seed.raw_items()); session.seed_initial_context_if_needed(&turn_context).await; expected.extend(session.build_initial_context(&turn_context).await); let history_after_seed = session.clone_history().await; assert_eq!(expected, history_after_seed.raw_items()); session.seed_initial_context_if_needed(&turn_context).await; let history_after_second_seed = session.clone_history().await; assert_eq!(expected, history_after_second_seed.raw_items()); } #[tokio::test] async fn record_initial_history_seeds_token_info_from_rollout() { let (session, turn_context) = make_session_and_context().await; let (mut rollout_items, _expected) = sample_rollout(&session, &turn_context).await; let info1 = TokenUsageInfo { total_token_usage: TokenUsage { input_tokens: 10, cached_input_tokens: 0, output_tokens: 20, reasoning_output_tokens: 0, total_tokens: 30, }, last_token_usage: TokenUsage { input_tokens: 3, cached_input_tokens: 0, output_tokens: 4, reasoning_output_tokens: 0, total_tokens: 7, }, model_context_window: Some(1_000), }; let info2 = TokenUsageInfo { total_token_usage: TokenUsage { input_tokens: 100, cached_input_tokens: 50, output_tokens: 200, reasoning_output_tokens: 25, total_tokens: 375, }, last_token_usage: TokenUsage { input_tokens: 10, cached_input_tokens: 0, output_tokens: 20, reasoning_output_tokens: 5, total_tokens: 35, }, model_context_window: Some(2_000), }; rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: Some(info1), rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: None, rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: Some(info2.clone()), rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: None, rate_limits: None, }, ))); session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let actual = session.state.lock().await.token_info(); assert_eq!(actual, Some(info2)); } #[tokio::test] async fn recompute_token_usage_uses_session_base_instructions() { let (session, turn_context) = make_session_and_context().await; let override_instructions = "SESSION_OVERRIDE_INSTRUCTIONS_ONLY".repeat(120); { let mut state = session.state.lock().await; state.session_configuration.base_instructions = override_instructions.clone(); } let item = user_message("hello"); session .record_into_history(std::slice::from_ref(&item), &turn_context) .await; let history = session.clone_history().await; let session_base_instructions = BaseInstructions { text: override_instructions, }; let expected_tokens = history .estimate_token_count_with_base_instructions(&session_base_instructions) .expect("estimate with session base instructions"); let model_estimated_tokens = history .estimate_token_count(&turn_context) .expect("estimate with model instructions"); assert_ne!(expected_tokens, model_estimated_tokens); session.recompute_token_usage(&turn_context).await; let actual_tokens = session .state .lock() .await .token_info() .expect("token info") .last_token_usage .total_tokens; assert_eq!(actual_tokens, expected_tokens.max(0)); } #[tokio::test] async fn recompute_token_usage_updates_model_context_window() { let (session, mut turn_context) = make_session_and_context().await; { let mut state = session.state.lock().await; state.set_token_info(Some(TokenUsageInfo { total_token_usage: TokenUsage::default(), last_token_usage: TokenUsage::default(), model_context_window: Some(258_400), })); } turn_context.model_info.context_window = Some(128_000); turn_context.model_info.effective_context_window_percent = 100; session.recompute_token_usage(&turn_context).await; let actual = session.state.lock().await.token_info().expect("token info"); assert_eq!(actual.model_context_window, Some(128_000)); } #[tokio::test] async fn record_initial_history_reconstructs_forked_transcript() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, mut expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Forked(rollout_items)) .await; let reconstruction_turn = session.new_default_turn().await; expected.extend( session .build_initial_context(reconstruction_turn.as_ref()) .await, ); let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } #[tokio::test] async fn record_initial_history_forked_hydrates_previous_model() { let (session, turn_context) = make_session_and_context().await; let previous_model = "forked-rollout-model"; let rollout_items = vec![RolloutItem::TurnContext(TurnContextItem { turn_id: Some(turn_context.sub_id.clone()), cwd: turn_context.cwd.clone(), approval_policy: turn_context.approval_policy, sandbox_policy: turn_context.sandbox_policy.clone(), network: None, model: previous_model.to_string(), personality: turn_context.personality, collaboration_mode: Some(turn_context.collaboration_mode.clone()), effort: turn_context.reasoning_effort, summary: turn_context.reasoning_summary, user_instructions: None, developer_instructions: None, final_output_json_schema: None, truncation_policy: Some(turn_context.truncation_policy.into()), })]; session .record_initial_history(InitialHistory::Forked(rollout_items)) .await; assert_eq!( session.previous_model().await, Some(previous_model.to_string()) ); } #[tokio::test] async fn thread_rollback_drops_last_turn_from_history() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; let turn_1 = vec![ ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "turn 1 user".to_string(), }], end_turn: None, phase: None, }, ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "turn 1 assistant".to_string(), }], end_turn: None, phase: None, }, ]; sess.record_into_history(&turn_1, tc.as_ref()).await; let turn_2 = vec![ ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "turn 2 user".to_string(), }], end_turn: None, phase: None, }, ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "turn 2 assistant".to_string(), }], end_turn: None, phase: None, }, ]; sess.record_into_history(&turn_2, tc.as_ref()).await; handlers::thread_rollback(&sess, "sub-1".to_string(), 1).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 1); let mut expected = Vec::new(); expected.extend(initial_context); expected.extend(turn_1); let history = sess.clone_history().await; assert_eq!(expected, history.raw_items()); assert_eq!( sess.previous_model().await, Some(tc.model_info.slug.clone()) ); } #[tokio::test] async fn thread_rollback_clears_history_when_num_turns_exceeds_existing_turns() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; let turn_1 = vec![ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "turn 1 user".to_string(), }], end_turn: None, phase: None, }]; sess.record_into_history(&turn_1, tc.as_ref()).await; handlers::thread_rollback(&sess, "sub-1".to_string(), 99).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 99); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn thread_rollback_fails_when_turn_in_progress() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; *sess.active_turn.lock().await = Some(crate::state::ActiveTurn::default()); handlers::thread_rollback(&sess, "sub-1".to_string(), 1).await; let error_event = wait_for_thread_rollback_failed(&rx).await; assert_eq!( error_event.codex_error_info, Some(CodexErrorInfo::ThreadRollbackFailed) ); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn thread_rollback_fails_when_num_turns_is_zero() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), 0).await; let error_event = wait_for_thread_rollback_failed(&rx).await; assert_eq!(error_event.message, "num_turns must be >= 1"); assert_eq!( error_event.codex_error_info, Some(CodexErrorInfo::ThreadRollbackFailed) ); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn set_rate_limits_retains_previous_credits() { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests(model.as_str(), &config); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, }; let mut state = SessionState::new(session_configuration); let initial = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 10.0, window_minutes: Some(15), resets_at: Some(1_700), }), secondary: None, credits: Some(CreditsSnapshot { has_credits: true, unlimited: false, balance: Some("10.00".to_string()), }), plan_type: Some(codex_protocol::account::PlanType::Plus), }; state.set_rate_limits(initial.clone()); let update = RateLimitSnapshot { limit_id: Some("codex_other".to_string()), limit_name: Some("codex_other".to_string()), primary: Some(RateLimitWindow { used_percent: 40.0, window_minutes: Some(30), resets_at: Some(1_800), }), secondary: Some(RateLimitWindow { used_percent: 5.0, window_minutes: Some(60), resets_at: Some(1_900), }), credits: None, plan_type: None, }; state.set_rate_limits(update.clone()); assert_eq!( state.latest_rate_limits, Some(RateLimitSnapshot { limit_id: Some("codex_other".to_string()), limit_name: Some("codex_other".to_string()), primary: update.primary.clone(), secondary: update.secondary, credits: initial.credits, plan_type: initial.plan_type, }) ); } #[tokio::test] async fn set_rate_limits_updates_plan_type_when_present() { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests(model.as_str(), &config); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, }; let mut state = SessionState::new(session_configuration); let initial = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 15.0, window_minutes: Some(20), resets_at: Some(1_600), }), secondary: Some(RateLimitWindow { used_percent: 5.0, window_minutes: Some(45), resets_at: Some(1_650), }), credits: Some(CreditsSnapshot { has_credits: true, unlimited: false, balance: Some("15.00".to_string()), }), plan_type: Some(codex_protocol::account::PlanType::Plus), }; state.set_rate_limits(initial.clone()); let update = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 35.0, window_minutes: Some(25), resets_at: Some(1_700), }), secondary: None, credits: None, plan_type: Some(codex_protocol::account::PlanType::Pro), }; state.set_rate_limits(update.clone()); assert_eq!( state.latest_rate_limits, Some(RateLimitSnapshot { limit_id: Some("codex".to_string()), limit_name: None, primary: update.primary, secondary: update.secondary, credits: initial.credits, plan_type: update.plan_type, }) ); } #[test] fn prefers_structured_content_when_present() { let ctr = McpCallToolResult { // Content present but should be ignored because structured_content is set. content: vec![text_block("ignored")], is_error: None, structured_content: Some(json!({ "ok": true, "value": 42 })), meta: None, }; let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ "ok": true, "value": 42 })) .unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } #[tokio::test] async fn includes_timed_out_message() { let exec = ExecToolCallOutput { exit_code: 0, stdout: StreamOutput::new(String::new()), stderr: StreamOutput::new(String::new()), aggregated_output: StreamOutput::new("Command output".to_string()), duration: StdDuration::from_secs(1), timed_out: true, }; let (_, turn_context) = make_session_and_context().await; let out = format_exec_output_str(&exec, turn_context.truncation_policy); assert_eq!( out, "command timed out after 1000 milliseconds\nCommand output" ); } #[tokio::test] async fn turn_context_with_model_updates_model_fields() { let (session, mut turn_context) = make_session_and_context().await; turn_context.reasoning_effort = Some(ReasoningEffortConfig::Minimal); let updated = turn_context .with_model("gpt-5.1".to_string(), &session.services.models_manager) .await; let expected_model_info = session .services .models_manager .get_model_info("gpt-5.1", updated.config.as_ref()) .await; assert_eq!(updated.config.model.as_deref(), Some("gpt-5.1")); assert_eq!(updated.collaboration_mode.model(), "gpt-5.1"); assert_eq!(updated.model_info, expected_model_info); assert_eq!( updated.reasoning_effort, Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.collaboration_mode.reasoning_effort(), Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.config.model_reasoning_effort, Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.truncation_policy, expected_model_info.truncation_policy.into() ); assert!(!Arc::ptr_eq( &updated.tool_call_gate, &turn_context.tool_call_gate )); } #[test] fn falls_back_to_content_when_structured_is_null() { let ctr = McpCallToolResult { content: vec![text_block("hello"), text_block("world")], is_error: None, structured_content: Some(serde_json::Value::Null), meta: None, }; let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } #[test] fn success_flag_reflects_is_error_true() { let ctr = McpCallToolResult { content: vec![text_block("unused")], is_error: Some(true), structured_content: Some(json!({ "message": "bad" })), meta: None, }; let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ "message": "bad" })).unwrap(), ), success: Some(false), }; assert_eq!(expected, got); } #[test] fn success_flag_true_with_no_error_and_content_used() { let ctr = McpCallToolResult { content: vec![text_block("alpha")], is_error: Some(false), structured_content: None, meta: None, }; let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("alpha")]).unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } async fn wait_for_thread_rolled_back( rx: &async_channel::Receiver, ) -> crate::protocol::ThreadRolledBackEvent { let deadline = StdDuration::from_secs(2); let start = std::time::Instant::now(); loop { let remaining = deadline.saturating_sub(start.elapsed()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::ThreadRolledBack(payload) => return payload, _ => continue, } } } async fn wait_for_thread_rollback_failed(rx: &async_channel::Receiver) -> ErrorEvent { let deadline = StdDuration::from_secs(2); let start = std::time::Instant::now(); loop { let remaining = deadline.saturating_sub(start.elapsed()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::Error(payload) if payload.codex_error_info == Some(CodexErrorInfo::ThreadRollbackFailed) => { return payload; } _ => continue, } } } fn text_block(s: &str) -> serde_json::Value { json!({ "type": "text", "text": s, }) } async fn build_test_config(codex_home: &Path) -> Config { ConfigBuilder::default() .codex_home(codex_home.to_path_buf()) .build() .await .expect("load default test config") } fn otel_manager( conversation_id: ThreadId, config: &Config, model_info: &ModelInfo, session_source: SessionSource, ) -> OtelManager { OtelManager::new( conversation_id, ModelsManager::get_model_offline_for_tests(config.model.as_deref()).as_str(), model_info.slug.as_str(), None, Some("test@test.com".to_string()), Some(TelemetryAuthMode::Chatgpt), "test_originator".to_string(), false, "test".to_string(), session_source, ) } pub(crate) async fn make_session_configuration_for_tests() -> SessionConfiguration { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests(model.as_str(), &config); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, } } // todo: use online model info pub(crate) async fn make_session_and_context() -> (Session, TurnContext) { let (tx_event, _rx_event) = async_channel::unbounded(); let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let models_manager = Arc::new(ModelsManager::new( config.codex_home.clone(), auth_manager.clone(), )); let agent_control = AgentControl::default(); let exec_policy = ExecPolicyManager::default(); let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests(model.as_str(), &config); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); let model_info = ModelsManager::construct_model_info_offline_for_tests( session_configuration.collaboration_mode.model(), &per_turn_config, ); let otel_manager = otel_manager( conversation_id, config.as_ref(), &model_info, session_configuration.session_source.clone(), ); let mut state = SessionState::new(session_configuration.clone()); mark_state_initial_context_seeded(&mut state); let skills_manager = Arc::new(SkillsManager::new(config.codex_home.clone())); let network_approval = Arc::new(NetworkApprovalService::default()); let file_watcher = Arc::new(FileWatcher::noop()); let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: Mutex::new(CancellationToken::new()), unified_exec_manager: UnifiedExecProcessManager::default(), analytics_events_client: AnalyticsEventsClient::new( Arc::clone(&config), Arc::clone(&auth_manager), ), hooks: Hooks::new(HooksConfig { legacy_notify_argv: config.notify.clone(), }), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), shell_snapshot_tx: watch::channel(None).0, show_raw_agent_reasoning: config.show_raw_agent_reasoning, exec_policy, auth_manager: auth_manager.clone(), otel_manager: otel_manager.clone(), models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, file_watcher, agent_control, network_proxy: None, network_approval: Arc::clone(&network_approval), state_db: None, model_client: ModelClient::new( Some(auth_manager.clone()), conversation_id, session_configuration.provider.clone(), session_configuration.session_source.clone(), config.model_verbosity, model_info.prefer_websockets || config.features.enabled(Feature::ResponsesWebsockets) || config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::EnableRequestCompression), config.features.enabled(Feature::RuntimeMetrics), Session::build_model_client_beta_features_header(config.as_ref()), ), }; let js_repl = Arc::new(JsReplHandle::with_node_path( config.js_repl_node_path.clone(), config.codex_home.clone(), )); let turn_context = Session::make_turn_context( Some(Arc::clone(&auth_manager)), &otel_manager, session_configuration.provider.clone(), &session_configuration, per_turn_config, model_info, None, "turn_id".to_string(), Arc::clone(&js_repl), ); let session = Session { conversation_id, tx_event, agent_status: agent_status_tx, state: Mutex::new(state), features: config.features.clone(), pending_mcp_server_refresh_config: Mutex::new(None), active_turn: Mutex::new(None), services, js_repl, next_internal_sub_id: AtomicU64::new(0), }; (session, turn_context) } // Like make_session_and_context, but returns Arc and the event receiver // so tests can assert on emitted events. pub(crate) async fn make_session_and_context_with_rx() -> ( Arc, Arc, async_channel::Receiver, ) { let (tx_event, rx_event) = async_channel::unbounded(); let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let models_manager = Arc::new(ModelsManager::new( config.codex_home.clone(), auth_manager.clone(), )); let agent_control = AgentControl::default(); let exec_policy = ExecPolicyManager::default(); let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests(model.as_str(), &config); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), approval_policy: config.permissions.approval_policy.clone(), sandbox_policy: config.permissions.sandbox_policy.clone(), windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); let model_info = ModelsManager::construct_model_info_offline_for_tests( session_configuration.collaboration_mode.model(), &per_turn_config, ); let otel_manager = otel_manager( conversation_id, config.as_ref(), &model_info, session_configuration.session_source.clone(), ); let mut state = SessionState::new(session_configuration.clone()); mark_state_initial_context_seeded(&mut state); let skills_manager = Arc::new(SkillsManager::new(config.codex_home.clone())); let network_approval = Arc::new(NetworkApprovalService::default()); let file_watcher = Arc::new(FileWatcher::noop()); let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())), mcp_startup_cancellation_token: Mutex::new(CancellationToken::new()), unified_exec_manager: UnifiedExecProcessManager::default(), analytics_events_client: AnalyticsEventsClient::new( Arc::clone(&config), Arc::clone(&auth_manager), ), hooks: Hooks::new(HooksConfig { legacy_notify_argv: config.notify.clone(), }), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), shell_snapshot_tx: watch::channel(None).0, show_raw_agent_reasoning: config.show_raw_agent_reasoning, exec_policy, auth_manager: Arc::clone(&auth_manager), otel_manager: otel_manager.clone(), models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), skills_manager, file_watcher, agent_control, network_proxy: None, network_approval: Arc::clone(&network_approval), state_db: None, model_client: ModelClient::new( Some(Arc::clone(&auth_manager)), conversation_id, session_configuration.provider.clone(), session_configuration.session_source.clone(), config.model_verbosity, model_info.prefer_websockets || config.features.enabled(Feature::ResponsesWebsockets) || config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::ResponsesWebsocketsV2), config.features.enabled(Feature::EnableRequestCompression), config.features.enabled(Feature::RuntimeMetrics), Session::build_model_client_beta_features_header(config.as_ref()), ), }; let js_repl = Arc::new(JsReplHandle::with_node_path( config.js_repl_node_path.clone(), config.codex_home.clone(), )); let turn_context = Arc::new(Session::make_turn_context( Some(Arc::clone(&auth_manager)), &otel_manager, session_configuration.provider.clone(), &session_configuration, per_turn_config, model_info, None, "turn_id".to_string(), Arc::clone(&js_repl), )); let session = Arc::new(Session { conversation_id, tx_event, agent_status: agent_status_tx, state: Mutex::new(state), features: config.features.clone(), pending_mcp_server_refresh_config: Mutex::new(None), active_turn: Mutex::new(None), services, js_repl, next_internal_sub_id: AtomicU64::new(0), }); (session, turn_context, rx_event) } fn mark_state_initial_context_seeded(state: &mut SessionState) { state.initial_context_seeded = true; } #[tokio::test] async fn refresh_mcp_servers_is_deferred_until_next_turn() { let (session, turn_context) = make_session_and_context().await; let old_token = session.mcp_startup_cancellation_token().await; assert!(!old_token.is_cancelled()); let mcp_oauth_credentials_store_mode = serde_json::to_value(OAuthCredentialsStoreMode::Auto).expect("serialize store mode"); let refresh_config = McpServerRefreshConfig { mcp_servers: json!({}), mcp_oauth_credentials_store_mode, }; { let mut guard = session.pending_mcp_server_refresh_config.lock().await; *guard = Some(refresh_config); } assert!(!old_token.is_cancelled()); assert!( session .pending_mcp_server_refresh_config .lock() .await .is_some() ); session .refresh_mcp_servers_if_requested(&turn_context) .await; assert!(old_token.is_cancelled()); assert!( session .pending_mcp_server_refresh_config .lock() .await .is_none() ); let new_token = session.mcp_startup_cancellation_token().await; assert!(!new_token.is_cancelled()); } #[tokio::test] async fn record_model_warning_appends_user_message() { let (mut session, turn_context) = make_session_and_context().await; let features = Features::with_defaults(); session.features = features; session .record_model_warning("too many unified exec processes", &turn_context) .await; let history = session.clone_history().await; let history_items = history.raw_items(); let last = history_items.last().expect("warning recorded"); match last { ResponseItem::Message { role, content, .. } => { assert_eq!(role, "user"); assert_eq!( content, &vec![ContentItem::InputText { text: "Warning: too many unified exec processes".to_string(), }] ); } other => panic!("expected user message, got {other:?}"), } } #[tokio::test] async fn spawn_task_hydrates_previous_model() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; sess.set_previous_model(None).await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; assert_eq!( sess.previous_model().await, Some(tc.model_info.slug.clone()) ); } #[tokio::test] async fn build_settings_update_items_emits_environment_item_for_network_changes() { let (session, previous_context) = make_session_and_context().await; let previous_context = Arc::new(previous_context); let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; let mut config = (*current_context.config).clone(); let mut requirements = config.config_layer_stack.requirements().clone(); requirements.network = Some(Sourced::new( NetworkConstraints { allowed_domains: Some(vec!["api.example.com".to_string()]), denied_domains: Some(vec!["blocked.example.com".to_string()]), ..Default::default() }, RequirementSource::CloudRequirements, )); let layers = config .config_layer_stack .get_layers(ConfigLayerStackOrdering::LowestPrecedenceFirst, true) .into_iter() .cloned() .collect(); config.config_layer_stack = ConfigLayerStack::new( layers, requirements, config.config_layer_stack.requirements_toml().clone(), ) .expect("rebuild config layer stack with network requirements"); current_context.config = Arc::new(config); let update_items = session.build_settings_update_items(Some(&previous_context), None, ¤t_context); let environment_update = update_items .iter() .find_map(|item| match item { ResponseItem::Message { role, content, .. } if role == "user" => { let [ContentItem::InputText { text }] = content.as_slice() else { return None; }; text.contains("").then_some(text) } _ => None, }) .expect("environment update item should be emitted"); assert!(environment_update.contains("")); assert!(environment_update.contains("api.example.com")); assert!(environment_update.contains("blocked.example.com")); } #[derive(Clone, Copy)] struct NeverEndingTask { kind: TaskKind, listen_to_cancellation_token: bool, } #[async_trait::async_trait] impl SessionTask for NeverEndingTask { fn kind(&self) -> TaskKind { self.kind } async fn run( self: Arc, _session: Arc, _ctx: Arc, _input: Vec, cancellation_token: CancellationToken, ) -> Option { if self.listen_to_cancellation_token { cancellation_token.cancelled().await; return None; } loop { sleep(Duration::from_secs(60)).await; } } } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[test_log::test] async fn abort_regular_task_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Interrupts persist a model-visible `` marker into history, but there is no // separate client-visible event for that marker (only `EventMsg::TurnAborted`). let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason), other => panic!("unexpected event: {other:?}"), } // No extra events should be emitted after an abort. assert!(rx.try_recv().is_err()); } #[tokio::test] async fn abort_gracefully_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Even if tasks handle cancellation gracefully, interrupts still result in `TurnAborted` // being the only client-visible signal. let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason), other => panic!("unexpected event: {other:?}"), } // No extra events should be emitted after an abort. assert!(rx.try_recv().is_err()); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn task_finish_persists_leftover_pending_input() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; sess.inject_response_items(vec![ResponseInputItem::Message { role: "user".to_string(), content: vec![ContentItem::InputText { text: "late pending input".to_string(), }], }]) .await .expect("inject pending input into active turn"); sess.on_task_finished(Arc::clone(&tc), None).await; let history = sess.clone_history().await; let expected = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "late pending input".to_string(), }], end_turn: None, phase: None, }; assert!( history.raw_items().iter().any(|item| item == &expected), "expected pending input to be persisted into history on turn completion" ); } #[tokio::test] async fn steer_input_requires_active_turn() { let (sess, _tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let err = sess .steer_input(input, None) .await .expect_err("steering without active turn should fail"); assert!(matches!(err, SteerInputError::NoActiveTurn(_))); } #[tokio::test] async fn steer_input_enforces_expected_turn_id() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; let steer_input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let err = sess .steer_input(steer_input, Some("different-turn-id")) .await .expect_err("mismatched expected turn id should fail"); match err { SteerInputError::ExpectedTurnMismatch { expected, actual } => { assert_eq!( (expected, actual), ("different-turn-id".to_string(), tc.sub_id.clone()) ); } other => panic!("unexpected error: {other:?}"), } } #[tokio::test] async fn steer_input_returns_active_turn_id() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; let steer_input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let turn_id = sess .steer_input(steer_input, Some(&tc.sub_id)) .await .expect("steering with matching expected turn id should succeed"); assert_eq!(turn_id, tc.sub_id); assert!(sess.has_pending_input().await); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn abort_review_task_emits_exited_then_aborted_and_records_history() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "start review".to_string(), text_elements: Vec::new(), }]; sess.spawn_task(Arc::clone(&tc), input, ReviewTask::new()) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Aborting a review task should exit review mode before surfacing the abort to the client. // We scan for these events (rather than relying on fixed ordering) since unrelated events // may interleave. let mut exited_review_mode_idx = None; let mut turn_aborted_idx = None; let mut idx = 0usize; let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(3); while tokio::time::Instant::now() < deadline { let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); let event_idx = idx; idx = idx.saturating_add(1); match evt.msg { EventMsg::ExitedReviewMode(ev) => { assert!(ev.review_output.is_none()); exited_review_mode_idx = Some(event_idx); } EventMsg::TurnAborted(ev) => { assert_eq!(TurnAbortReason::Interrupted, ev.reason); turn_aborted_idx = Some(event_idx); break; } _ => {} } } assert!( exited_review_mode_idx.is_some(), "expected ExitedReviewMode after abort" ); assert!( turn_aborted_idx.is_some(), "expected TurnAborted after abort" ); assert!( exited_review_mode_idx.unwrap() < turn_aborted_idx.unwrap(), "expected ExitedReviewMode before TurnAborted" ); let history = sess.clone_history().await; // The `` marker is silent in the event stream, so verify it is still // recorded in history for the model. assert!( history.raw_items().iter().any(|item| { let ResponseItem::Message { role, content, .. } = item else { return false; }; if role != "user" { return false; } content.iter().any(|content_item| { let ContentItem::InputText { text } = content_item else { return false; }; text.contains(crate::session_prefix::TURN_ABORTED_OPEN_TAG) }) }), "expected a model-visible turn aborted marker in history after interrupt" ); } #[tokio::test] async fn fatal_tool_error_stops_turn_and_reports_error() { let (session, turn_context, _rx) = make_session_and_context_with_rx().await; let tools = { session .services .mcp_connection_manager .read() .await .list_all_tools() .await }; let app_tools = Some(tools.clone()); let router = ToolRouter::from_config( &turn_context.tools_config, Some( tools .into_iter() .map(|(name, tool)| (name, tool.tool)) .collect(), ), app_tools, turn_context.dynamic_tools.as_slice(), ); let item = ResponseItem::CustomToolCall { id: None, status: None, call_id: "call-1".to_string(), name: "shell".to_string(), input: "{}".to_string(), }; let call = ToolRouter::build_tool_call(session.as_ref(), item.clone()) .await .expect("build tool call") .expect("tool call present"); let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let err = router .dispatch_tool_call( Arc::clone(&session), Arc::clone(&turn_context), tracker, call, ToolCallSource::Direct, ) .await .expect_err("expected fatal error"); match err { FunctionCallError::Fatal(message) => { assert_eq!(message, "tool shell invoked with incompatible payload"); } other => panic!("expected FunctionCallError::Fatal, got {other:?}"), } } async fn sample_rollout( session: &Session, _turn_context: &TurnContext, ) -> (Vec, Vec) { let mut rollout_items = Vec::new(); let mut live_history = ContextManager::new(); // Use the same turn_context source as record_initial_history so model_info (and thus // personality_spec) matches reconstruction. let reconstruction_turn = session.new_default_turn().await; let mut initial_context = session .build_initial_context(reconstruction_turn.as_ref()) .await; // Ensure personality_spec is present when Personality is enabled, so expected matches // what reconstruction produces (build_initial_context may omit it when baked into model). if !initial_context.iter().any(|m| { matches!(m, ResponseItem::Message { role, content, .. } if role == "developer" && content.iter().any(|c| { matches!(c, ContentItem::InputText { text } if text.contains("")) })) }) && let Some(p) = reconstruction_turn.personality && session.features.enabled(Feature::Personality) && let Some(personality_message) = reconstruction_turn .model_info .model_messages .as_ref() .and_then(|m| m.get_personality_message(Some(p)).filter(|s| !s.is_empty())) { let msg = DeveloperInstructions::personality_spec_message(personality_message).into(); let insert_at = initial_context .iter() .position(|m| matches!(m, ResponseItem::Message { role, .. } if role == "developer")) .map(|i| i + 1) .unwrap_or(0); initial_context.insert(insert_at, msg); } for item in &initial_context { rollout_items.push(RolloutItem::ResponseItem(item.clone())); } live_history.record_items( initial_context.iter(), reconstruction_turn.truncation_policy, ); let user1 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "first user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user1), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user1.clone())); let assistant1 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply one".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant1), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant1.clone())); let summary1 = "summary one"; let snapshot1 = live_history .clone() .for_prompt(&reconstruction_turn.model_info.input_modalities); let user_messages1 = collect_user_messages(&snapshot1); let mut rebuilt1 = initial_context.clone(); rebuilt1.extend(compact::build_compacted_history(&user_messages1, summary1)); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { message: summary1.to_string(), replacement_history: None, })); let user2 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "second user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user2), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user2.clone())); let assistant2 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply two".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant2), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant2.clone())); let summary2 = "summary two"; let snapshot2 = live_history .clone() .for_prompt(&reconstruction_turn.model_info.input_modalities); let user_messages2 = collect_user_messages(&snapshot2); let mut rebuilt2 = initial_context.clone(); rebuilt2.extend(compact::build_compacted_history(&user_messages2, summary2)); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { message: summary2.to_string(), replacement_history: None, })); let user3 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "third user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user3), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user3)); let assistant3 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply three".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant3), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant3)); ( rollout_items, live_history.for_prompt(&reconstruction_turn.model_info.input_modalities), ) } #[tokio::test] async fn rejects_escalated_permissions_when_policy_not_on_request() { use crate::exec::ExecParams; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; use crate::sandboxing::SandboxPermissions; use crate::turn_diff_tracker::TurnDiffTracker; use std::collections::HashMap; let (session, mut turn_context_raw) = make_session_and_context().await; // Ensure policy is NOT OnRequest so the early rejection path triggers turn_context_raw.approval_policy = AskForApproval::OnFailure; let session = Arc::new(session); let mut turn_context = Arc::new(turn_context_raw); let timeout_ms = 1000; let sandbox_permissions = SandboxPermissions::RequireEscalated; let params = ExecParams { command: if cfg!(windows) { vec![ "cmd.exe".to_string(), "/C".to_string(), "echo hi".to_string(), ] } else { vec![ "/bin/sh".to_string(), "-c".to_string(), "echo hi".to_string(), ] }, cwd: turn_context.cwd.clone(), expiration: timeout_ms.into(), env: HashMap::new(), network: None, network_attempt_id: None, sandbox_permissions, windows_sandbox_level: turn_context.windows_sandbox_level, justification: Some("test".to_string()), arg0: None, }; let params2 = ExecParams { sandbox_permissions: SandboxPermissions::UseDefault, command: params.command.clone(), cwd: params.cwd.clone(), expiration: timeout_ms.into(), env: HashMap::new(), network: None, network_attempt_id: None, windows_sandbox_level: turn_context.windows_sandbox_level, justification: params.justification.clone(), arg0: None, }; let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let tool_name = "shell"; let call_id = "test-call".to_string(); let handler = ShellHandler; let resp = handler .handle(ToolInvocation { session: Arc::clone(&session), turn: Arc::clone(&turn_context), tracker: Arc::clone(&turn_diff_tracker), call_id, tool_name: tool_name.to_string(), payload: ToolPayload::Function { arguments: serde_json::json!({ "command": params.command.clone(), "workdir": Some(turn_context.cwd.to_string_lossy().to_string()), "timeout_ms": params.expiration.timeout_ms(), "sandbox_permissions": params.sandbox_permissions, "justification": params.justification.clone(), }) .to_string(), }, }) .await; let Err(FunctionCallError::RespondToModel(output)) = resp else { panic!("expected error result"); }; let expected = format!( "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}", policy = turn_context.approval_policy ); pretty_assertions::assert_eq!(output, expected); // Now retry the same command WITHOUT escalated permissions; should succeed. // Force DangerFullAccess to avoid platform sandbox dependencies in tests. Arc::get_mut(&mut turn_context) .expect("unique turn context Arc") .sandbox_policy = SandboxPolicy::DangerFullAccess; let resp2 = handler .handle(ToolInvocation { session: Arc::clone(&session), turn: Arc::clone(&turn_context), tracker: Arc::clone(&turn_diff_tracker), call_id: "test-call-2".to_string(), tool_name: tool_name.to_string(), payload: ToolPayload::Function { arguments: serde_json::json!({ "command": params2.command.clone(), "workdir": Some(turn_context.cwd.to_string_lossy().to_string()), "timeout_ms": params2.expiration.timeout_ms(), "sandbox_permissions": params2.sandbox_permissions, "justification": params2.justification.clone(), }) .to_string(), }, }) .await; let output = match resp2.expect("expected Ok result") { ToolOutput::Function { body: FunctionCallOutputBody::Text(content), .. } => content, _ => panic!("unexpected tool output"), }; #[derive(Deserialize, PartialEq, Eq, Debug)] struct ResponseExecMetadata { exit_code: i32, } #[derive(Deserialize)] struct ResponseExecOutput { output: String, metadata: ResponseExecMetadata, } let exec_output: ResponseExecOutput = serde_json::from_str(&output).expect("valid exec output json"); pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 }); assert!(exec_output.output.contains("hi")); } #[tokio::test] async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { use crate::protocol::AskForApproval; use crate::sandboxing::SandboxPermissions; use crate::turn_diff_tracker::TurnDiffTracker; let (session, mut turn_context_raw) = make_session_and_context().await; turn_context_raw.approval_policy = AskForApproval::OnFailure; let session = Arc::new(session); let turn_context = Arc::new(turn_context_raw); let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let handler = UnifiedExecHandler; let resp = handler .handle(ToolInvocation { session: Arc::clone(&session), turn: Arc::clone(&turn_context), tracker: Arc::clone(&tracker), call_id: "exec-call".to_string(), tool_name: "exec_command".to_string(), payload: ToolPayload::Function { arguments: serde_json::json!({ "cmd": "echo hi", "sandbox_permissions": SandboxPermissions::RequireEscalated, "justification": "need unsandboxed execution", }) .to_string(), }, }) .await; let Err(FunctionCallError::RespondToModel(output)) = resp else { panic!("expected error result"); }; let expected = format!( "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", policy = turn_context.approval_policy ); pretty_assertions::assert_eq!(output, expected); } }