use super::*; use crate::config::ConfigBuilder; use crate::config::test_config; use crate::config_loader::ConfigLayerStack; use crate::config_loader::ConfigLayerStackOrdering; use crate::config_loader::NetworkConstraints; use crate::config_loader::NetworkDomainPermissionToml; use crate::config_loader::NetworkDomainPermissionsToml; use crate::config_loader::RequirementSource; use crate::config_loader::Sourced; use crate::exec::ExecCapturePolicy; use crate::function_tool::FunctionCallError; use crate::shell::default_user_shell; use crate::tools::format_exec_output_str; use codex_features::Features; use codex_login::CodexAuth; use codex_mcp::ToolInfo; use codex_model_provider_info::ModelProviderInfo; use codex_models_manager::bundled_models_response; use codex_models_manager::model_info; use codex_protocol::AgentPath; use codex_protocol::ThreadId; use codex_protocol::exec_output::ExecToolCallOutput; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::permissions::FileSystemAccessMode; use codex_protocol::permissions::FileSystemPath; use codex_protocol::permissions::FileSystemSandboxEntry; use codex_protocol::permissions::FileSystemSandboxPolicy; use codex_protocol::permissions::FileSystemSpecialPath; use codex_protocol::protocol::NonSteerableTurnKind; use codex_protocol::protocol::ReadOnlyAccess; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::request_permissions::PermissionGrantScope; use codex_protocol::request_permissions::RequestPermissionProfile; use tracing::Span; use crate::RolloutRecorderParams; use crate::rollout::policy::EventPersistenceMode; use crate::rollout::recorder::RolloutRecorder; use crate::state::TaskKind; use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; use crate::tools::ToolRouter; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolPayload; use crate::tools::handlers::ShellHandler; use crate::tools::handlers::UnifiedExecHandler; use crate::tools::registry::ToolHandler; use crate::tools::router::ToolCallSource; use crate::turn_diff_tracker::TurnDiffTracker; use codex_app_server_protocol::AppInfo; use codex_execpolicy::Decision; use codex_execpolicy::NetworkRuleProtocol; use codex_execpolicy::Policy; use codex_network_proxy::NetworkProxyConfig; use codex_otel::TelemetryAuthMode; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ContentItem; use codex_protocol::models::DeveloperInstructions; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::CompactedItem; use codex_protocol::protocol::ConversationAudioParams; use codex_protocol::protocol::CreditsSnapshot; use codex_protocol::protocol::GranularApprovalConfig; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::NetworkApprovalProtocol; use codex_protocol::protocol::RateLimitSnapshot; use codex_protocol::protocol::RateLimitWindow; use codex_protocol::protocol::RealtimeAudioFrame; use codex_protocol::protocol::ResumedHistory; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::Submission; use codex_protocol::protocol::ThreadRolledBackEvent; use codex_protocol::protocol::TokenCountEvent; use codex_protocol::protocol::TokenUsage; use codex_protocol::protocol::TokenUsageInfo; use codex_protocol::protocol::TurnAbortedEvent; use codex_protocol::protocol::TurnCompleteEvent; use codex_protocol::protocol::TurnStartedEvent; use codex_protocol::protocol::UserMessageEvent; use codex_protocol::protocol::W3cTraceContext; use core_test_support::PathBufExt; use core_test_support::context_snapshot; use core_test_support::context_snapshot::ContextSnapshotOptions; use core_test_support::context_snapshot::ContextSnapshotRenderMode; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_sse_once; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::test_codex::test_codex; use core_test_support::tracing::install_test_tracing; use core_test_support::wait_for_event; use opentelemetry::trace::TraceContextExt; use opentelemetry::trace::TraceId; use std::path::Path; use std::time::Duration; use tokio::time::sleep; use tracing_opentelemetry::OpenTelemetrySpanExt; use codex_protocol::mcp::CallToolResult as McpCallToolResult; use pretty_assertions::assert_eq; use rmcp::model::JsonObject; use rmcp::model::Tool; use serde::Deserialize; use serde_json::json; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration as StdDuration; #[path = "codex_tests_guardian.rs"] mod guardian_tests; struct InstructionsTestCase { slug: &'static str, expects_apply_patch_description: bool, } fn user_message(text: &str) -> ResponseItem { ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: text.to_string(), }], end_turn: None, phase: None, } } fn assistant_message(text: &str) -> ResponseItem { ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: text.to_string(), }], end_turn: None, phase: None, } } fn skill_message(text: &str) -> ResponseItem { ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: text.to_string(), }], end_turn: None, phase: None, } } #[tokio::test] async fn regular_turn_emits_turn_started_without_waiting_for_startup_prewarm() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let (_tx, startup_prewarm_rx) = tokio::sync::oneshot::channel::<()>(); let handle = tokio::spawn(async move { let _ = startup_prewarm_rx.await; Ok(test_model_client_session()) }); sess.set_session_startup_prewarm( crate::session_startup_prewarm::SessionStartupPrewarmHandle::new( handle, std::time::Instant::now(), crate::client::WEBSOCKET_CONNECT_TIMEOUT, ), ) .await; sess.spawn_task( Arc::clone(&tc), Vec::new(), crate::tasks::RegularTask::new(), ) .await; let first = tokio::time::timeout(std::time::Duration::from_millis(200), rx.recv()) .await .expect("expected turn started event without waiting for startup prewarm") .expect("channel open"); assert!(matches!( first.msg, EventMsg::TurnStarted(TurnStartedEvent { turn_id, .. }) if turn_id == tc.sub_id )); sess.abort_all_tasks(TurnAbortReason::Interrupted).await; } #[tokio::test] async fn interrupting_regular_turn_waiting_on_startup_prewarm_emits_turn_aborted() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let (_tx, startup_prewarm_rx) = tokio::sync::oneshot::channel::<()>(); let handle = tokio::spawn(async move { let _ = startup_prewarm_rx.await; Ok(test_model_client_session()) }); sess.set_session_startup_prewarm( crate::session_startup_prewarm::SessionStartupPrewarmHandle::new( handle, std::time::Instant::now(), crate::client::WEBSOCKET_CONNECT_TIMEOUT, ), ) .await; sess.spawn_task( Arc::clone(&tc), Vec::new(), crate::tasks::RegularTask::new(), ) .await; let first = tokio::time::timeout(std::time::Duration::from_millis(200), rx.recv()) .await .expect("expected turn started event without waiting for startup prewarm") .expect("channel open"); assert!(matches!( first.msg, EventMsg::TurnStarted(TurnStartedEvent { turn_id, .. }) if turn_id == tc.sub_id )); sess.abort_all_tasks(TurnAbortReason::Interrupted).await; let second = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected turn aborted event") .expect("channel open"); let EventMsg::TurnAborted(TurnAbortedEvent { turn_id, reason, completed_at, duration_ms, }) = second.msg else { panic!("expected turn aborted event"); }; assert_eq!(turn_id, Some(tc.sub_id.clone())); assert_eq!(reason, TurnAbortReason::Interrupted); assert!(completed_at.is_some()); assert!(duration_ms.is_some()); } fn test_model_client_session() -> crate::client::ModelClientSession { crate::client::ModelClient::new( /*auth_manager*/ None, ThreadId::try_from("00000000-0000-4000-8000-000000000001") .expect("test thread id should be valid"), /*installation_id*/ "11111111-1111-4111-8111-111111111111".to_string(), ModelProviderInfo::create_openai_provider(/* base_url */ /*base_url*/ None), codex_protocol::protocol::SessionSource::Exec, /*model_verbosity*/ None, /*enable_request_compression*/ false, /*include_timing_metrics*/ false, /*beta_features_header*/ None, ) .new_session() } fn developer_input_texts(items: &[ResponseItem]) -> Vec<&str> { items .iter() .filter_map(|item| match item { ResponseItem::Message { role, content, .. } if role == "developer" => { Some(content.as_slice()) } _ => None, }) .flat_map(|content| content.iter()) .filter_map(|item| match item { ContentItem::InputText { text } => Some(text.as_str()), _ => None, }) .collect() } fn user_input_texts(items: &[ResponseItem]) -> Vec<&str> { items .iter() .filter_map(|item| match item { ResponseItem::Message { role, content, .. } if role == "user" => { Some(content.as_slice()) } _ => None, }) .flat_map(|content| content.iter()) .filter_map(|item| match item { ContentItem::InputText { text } => Some(text.as_str()), _ => None, }) .collect() } fn test_tool_runtime(session: Arc, turn_context: Arc) -> ToolCallRuntime { let router = Arc::new(ToolRouter::from_config( &turn_context.tools_config, crate::tools::router::ToolRouterParams { mcp_tools: None, tool_namespaces: None, app_tools: None, discoverable_tools: None, dynamic_tools: turn_context.dynamic_tools.as_slice(), }, )); let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); ToolCallRuntime::new(router, session, turn_context, tracker) } fn make_connector(id: &str, name: &str) -> AppInfo { AppInfo { id: id.to_string(), name: name.to_string(), description: None, logo_url: None, logo_url_dark: None, distribution_channel: None, branding: None, app_metadata: None, labels: None, install_url: None, is_accessible: true, is_enabled: true, plugin_display_names: Vec::new(), } } #[test] fn assistant_message_stream_parsers_can_be_seeded_from_output_item_added_text() { let mut parsers = AssistantMessageStreamParsers::new(/*plan_mode*/ false); let item_id = "msg-1"; let seeded = parsers.seed_item_text(item_id, "hello doc"); let parsed = parsers.parse_delta(item_id, "1 world"); let tail = parsers.finish_item(item_id); assert_eq!(seeded.visible_text, "hello "); assert_eq!(seeded.citations, Vec::::new()); assert_eq!(parsed.visible_text, " world"); assert_eq!(parsed.citations, vec!["doc1".to_string()]); assert_eq!(tail.visible_text, ""); assert_eq!(tail.citations, Vec::::new()); } #[test] fn assistant_message_stream_parsers_seed_buffered_prefix_stays_out_of_finish_tail() { let mut parsers = AssistantMessageStreamParsers::new(/*plan_mode*/ false); let item_id = "msg-1"; let seeded = parsers.seed_item_text(item_id, "hello doc world"); let tail = parsers.finish_item(item_id); assert_eq!(seeded.visible_text, "hello "); assert_eq!(seeded.citations, Vec::::new()); assert_eq!(parsed.visible_text, " world"); assert_eq!(parsed.citations, vec!["doc".to_string()]); assert_eq!(tail.visible_text, ""); assert_eq!(tail.citations, Vec::::new()); } #[test] fn assistant_message_stream_parsers_seed_plan_parser_across_added_and_delta_boundaries() { let mut parsers = AssistantMessageStreamParsers::new(/*plan_mode*/ true); let item_id = "msg-1"; let seeded = parsers.seed_item_text(item_id, "Intro\n\n- step\n\nOutro"); let tail = parsers.finish_item(item_id); assert_eq!(seeded.visible_text, "Intro\n"); assert_eq!( seeded.plan_segments, vec![ProposedPlanSegment::Normal("Intro\n".to_string())] ); assert_eq!(parsed.visible_text, "Outro"); assert_eq!( parsed.plan_segments, vec![ ProposedPlanSegment::ProposedPlanStart, ProposedPlanSegment::ProposedPlanDelta("- step\n".to_string()), ProposedPlanSegment::ProposedPlanEnd, ProposedPlanSegment::Normal("Outro".to_string()), ] ); assert_eq!(tail.visible_text, ""); assert!(tail.plan_segments.is_empty()); } fn make_mcp_tool( server_name: &str, tool_name: &str, connector_id: Option<&str>, connector_name: Option<&str>, ) -> ToolInfo { let tool_namespace = if server_name == CODEX_APPS_MCP_SERVER_NAME { connector_name .map(crate::connectors::sanitize_name) .map(|connector_name| format!("mcp__{server_name}__{connector_name}")) .unwrap_or_else(|| server_name.to_string()) } else { server_name.to_string() }; ToolInfo { server_name: server_name.to_string(), tool_name: tool_name.to_string(), tool_namespace, server_instructions: None, tool: Tool { name: tool_name.to_string().into(), title: None, description: Some(format!("Test tool: {tool_name}").into()), input_schema: Arc::new(JsonObject::default()), output_schema: None, annotations: None, execution: None, icons: None, meta: None, }, connector_id: connector_id.map(str::to_string), connector_name: connector_name.map(str::to_string), plugin_display_names: Vec::new(), connector_description: None, } } #[test] fn validated_network_policy_amendment_host_allows_normalized_match() { let amendment = NetworkPolicyAmendment { host: "ExAmPlE.Com.:443".to_string(), action: NetworkPolicyRuleAction::Allow, }; let context = NetworkApprovalContext { host: "example.com".to_string(), protocol: NetworkApprovalProtocol::Https, }; let host = Session::validated_network_policy_amendment_host(&amendment, &context) .expect("normalized hosts should match"); assert_eq!(host, "example.com"); } #[test] fn validated_network_policy_amendment_host_rejects_mismatch() { let amendment = NetworkPolicyAmendment { host: "evil.example.com".to_string(), action: NetworkPolicyRuleAction::Deny, }; let context = NetworkApprovalContext { host: "api.example.com".to_string(), protocol: NetworkApprovalProtocol::Https, }; let err = Session::validated_network_policy_amendment_host(&amendment, &context) .expect_err("mismatched hosts should be rejected"); let message = err.to_string(); assert!(message.contains("does not match approved host")); } #[tokio::test] async fn start_managed_network_proxy_applies_execpolicy_network_rules() -> anyhow::Result<()> { let spec = crate::config::NetworkProxySpec::from_config_and_constraints( NetworkProxyConfig::default(), /*requirements*/ None, &SandboxPolicy::new_workspace_write_policy(), )?; let mut exec_policy = Policy::empty(); exec_policy.add_network_rule( "example.com", NetworkRuleProtocol::Https, Decision::Allow, /*justification*/ None, )?; let (started_proxy, _) = Session::start_managed_network_proxy( &spec, &exec_policy, &SandboxPolicy::new_workspace_write_policy(), /*network_policy_decider*/ None, /*blocked_request_observer*/ None, /*managed_network_requirements_enabled*/ false, crate::config::NetworkProxyAuditMetadata::default(), ) .await?; let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!( current_cfg.network.allowed_domains(), Some(vec!["example.com".to_string()]) ); Ok(()) } #[tokio::test] async fn start_managed_network_proxy_ignores_invalid_execpolicy_network_rules() -> anyhow::Result<()> { let spec = crate::config::NetworkProxySpec::from_config_and_constraints( NetworkProxyConfig::default(), Some(NetworkConstraints { domains: Some(NetworkDomainPermissionsToml { entries: std::collections::BTreeMap::from([( "managed.example.com".to_string(), NetworkDomainPermissionToml::Allow, )]), }), managed_allowed_domains_only: Some(true), ..Default::default() }), &SandboxPolicy::new_workspace_write_policy(), )?; let mut exec_policy = Policy::empty(); exec_policy.add_network_rule( "example.com", NetworkRuleProtocol::Https, Decision::Allow, /*justification*/ None, )?; let (started_proxy, _) = Session::start_managed_network_proxy( &spec, &exec_policy, &SandboxPolicy::new_workspace_write_policy(), /*network_policy_decider*/ None, /*blocked_request_observer*/ None, /*managed_network_requirements_enabled*/ false, crate::config::NetworkProxyAuditMetadata::default(), ) .await?; let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!( current_cfg.network.allowed_domains(), Some(vec!["managed.example.com".to_string()]) ); Ok(()) } #[tokio::test] async fn managed_network_proxy_refreshes_when_sandbox_policy_changes() -> anyhow::Result<()> { let spec = crate::config::NetworkProxySpec::from_config_and_constraints( NetworkProxyConfig::default(), Some(NetworkConstraints { domains: Some(NetworkDomainPermissionsToml { entries: std::collections::BTreeMap::from([( "blocked.example.com".to_string(), NetworkDomainPermissionToml::Deny, )]), }), danger_full_access_denylist_only: Some(true), allow_local_binding: Some(false), ..Default::default() }), &SandboxPolicy::new_workspace_write_policy(), )?; let exec_policy = Policy::empty(); let (started_proxy, _) = Session::start_managed_network_proxy( &spec, &exec_policy, &SandboxPolicy::new_workspace_write_policy(), /*network_policy_decider*/ None, /*blocked_request_observer*/ None, /*managed_network_requirements_enabled*/ false, crate::config::NetworkProxyAuditMetadata::default(), ) .await?; assert!(!started_proxy.proxy().allow_local_binding()); let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!(current_cfg.network.allowed_domains(), None); assert_eq!( current_cfg.network.denied_domains(), Some(vec!["blocked.example.com".to_string()]) ); let spec = spec.recompute_for_sandbox_policy(&SandboxPolicy::DangerFullAccess)?; spec.apply_to_started_proxy(&started_proxy).await?; assert!(started_proxy.proxy().allow_local_binding()); let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!( current_cfg.network.allowed_domains(), Some(vec!["*".to_string()]) ); assert_eq!( current_cfg.network.denied_domains(), Some(vec!["blocked.example.com".to_string()]) ); let spec = spec.recompute_for_sandbox_policy(&SandboxPolicy::new_workspace_write_policy())?; spec.apply_to_started_proxy(&started_proxy).await?; assert!(!started_proxy.proxy().allow_local_binding()); let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!(current_cfg.network.allowed_domains(), None); assert_eq!( current_cfg.network.denied_domains(), Some(vec!["blocked.example.com".to_string()]) ); Ok(()) } #[tokio::test] async fn managed_network_proxy_decider_survives_full_access_start() -> anyhow::Result<()> { let spec = crate::config::NetworkProxySpec::from_config_and_constraints( NetworkProxyConfig::default(), Some(NetworkConstraints { enabled: Some(true), danger_full_access_denylist_only: Some(true), ..Default::default() }), &SandboxPolicy::DangerFullAccess, )?; let exec_policy = Policy::empty(); let decider_calls = Arc::new(std::sync::atomic::AtomicUsize::new(0)); let network_policy_decider: Arc = Arc::new({ let decider_calls = Arc::clone(&decider_calls); move |_request| { decider_calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst); async { codex_network_proxy::NetworkDecision::ask("not_allowed") } } }); let (started_proxy, _) = Session::start_managed_network_proxy( &spec, &exec_policy, &SandboxPolicy::DangerFullAccess, Some(network_policy_decider), /*blocked_request_observer*/ None, /*managed_network_requirements_enabled*/ true, crate::config::NetworkProxyAuditMetadata::default(), ) .await?; let spec = spec.recompute_for_sandbox_policy(&SandboxPolicy::new_workspace_write_policy())?; spec.apply_to_started_proxy(&started_proxy).await?; let current_cfg = started_proxy.proxy().current_cfg().await?; assert_eq!(current_cfg.network.allowed_domains(), None); use tokio::io::AsyncReadExt as _; use tokio::io::AsyncWriteExt as _; let mut stream = tokio::net::TcpStream::connect(started_proxy.proxy().http_addr()).await?; stream .write_all( b"GET http://example.com/ HTTP/1.1\r\nHost: example.com\r\nConnection: close\r\n\r\n", ) .await?; let mut buffer = [0_u8; 4096]; let bytes_read = tokio::time::timeout(StdDuration::from_secs(2), stream.read(&mut buffer)) .await .expect("timed out waiting for proxy response")?; let response = String::from_utf8_lossy(&buffer[..bytes_read]); assert!( response.starts_with("HTTP/1.1 403 Forbidden"), "unexpected proxy response: {response}" ); assert!( response.contains("x-proxy-error: blocked-by-allowlist"), "unexpected proxy response: {response}" ); assert_eq!( decider_calls.load(std::sync::atomic::Ordering::SeqCst), 1, "unexpected proxy response: {response}" ); Ok(()) } #[tokio::test] async fn get_base_instructions_no_user_content() { let prompt_with_apply_patch_instructions = include_str!("../prompt_with_apply_patch_instructions.md"); let models_response = bundled_models_response() .unwrap_or_else(|err| panic!("bundled models.json should parse: {err}")); let model_info_for_slug = |slug: &str, config: &Config| { let model = models_response .models .iter() .find(|candidate| candidate.slug == slug) .cloned() .unwrap_or_else(|| panic!("model slug {slug} is missing from models.json")); model_info::with_config_overrides(model, &config.to_models_manager_config()) }; let test_cases = vec![ InstructionsTestCase { slug: "gpt-5", expects_apply_patch_description: false, }, InstructionsTestCase { slug: "gpt-5.1", expects_apply_patch_description: false, }, InstructionsTestCase { slug: "gpt-5.1-codex", expects_apply_patch_description: false, }, InstructionsTestCase { slug: "gpt-5.1-codex-max", expects_apply_patch_description: false, }, ]; let (session, _turn_context) = make_session_and_context().await; let config = test_config(); for test_case in test_cases { let model_info = model_info_for_slug(test_case.slug, &config); if test_case.expects_apply_patch_description { assert_eq!( model_info.base_instructions.as_str(), prompt_with_apply_patch_instructions ); } { let mut state = session.state.lock().await; state.session_configuration.base_instructions = model_info.base_instructions.clone(); } let base_instructions = session.get_base_instructions().await; assert_eq!(base_instructions.text, model_info.base_instructions); } } #[tokio::test] async fn reload_user_config_layer_updates_effective_apps_config() { let (session, _turn_context) = make_session_and_context().await; let codex_home = session.codex_home().await; std::fs::create_dir_all(&codex_home).expect("create codex home"); let config_toml_path = codex_home.join(CONFIG_TOML_FILE); std::fs::write( &config_toml_path, "[apps.calendar]\nenabled = false\ndestructive_enabled = false\n", ) .expect("write user config"); session.reload_user_config_layer().await; let config = session.get_config().await; let apps_toml = config .config_layer_stack .effective_config() .as_table() .and_then(|table| table.get("apps")) .cloned() .expect("apps table"); let apps = codex_config::types::AppsConfigToml::deserialize(apps_toml) .expect("deserialize apps config"); let app = apps .apps .get("calendar") .expect("calendar app config exists"); assert!(!app.enabled); assert_eq!(app.destructive_enabled, Some(false)); } #[test] fn filter_connectors_for_input_skips_duplicate_slug_mentions() { let connectors = vec![ make_connector("one", "Foo Bar"), make_connector("two", "Foo-Bar"), ]; let input = vec![user_message("use $foo-bar")]; let explicitly_enabled_connectors = HashSet::new(); let skill_name_counts_lower = HashMap::new(); let selected = filter_connectors_for_input( &connectors, &input, &explicitly_enabled_connectors, &skill_name_counts_lower, ); assert_eq!(selected, Vec::new()); } #[test] fn filter_connectors_for_input_skips_when_skill_name_conflicts() { let connectors = vec![make_connector("one", "Todoist")]; let input = vec![user_message("use $todoist")]; let explicitly_enabled_connectors = HashSet::new(); let skill_name_counts_lower = HashMap::from([("todoist".to_string(), 1)]); let selected = filter_connectors_for_input( &connectors, &input, &explicitly_enabled_connectors, &skill_name_counts_lower, ); assert_eq!(selected, Vec::new()); } #[test] fn filter_connectors_for_input_skips_disabled_connectors() { let mut connector = make_connector("calendar", "Calendar"); connector.is_enabled = false; let input = vec![user_message("use $calendar")]; let explicitly_enabled_connectors = HashSet::new(); let selected = filter_connectors_for_input( &[connector], &input, &explicitly_enabled_connectors, &HashMap::new(), ); assert_eq!(selected, Vec::new()); } #[test] fn filter_connectors_for_input_skips_plugin_mentions() { let connectors = vec![make_connector("figma", "Figma")]; let input = vec![user_message("use [@figma](plugin://figma@openai-curated)")]; let explicitly_enabled_connectors = HashSet::new(); let selected = filter_connectors_for_input( &connectors, &input, &explicitly_enabled_connectors, &HashMap::new(), ); assert_eq!(selected, Vec::new()); } #[test] fn collect_explicit_app_ids_from_skill_items_includes_linked_mentions() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse [$calendar](app://calendar)\n", )]; let connector_ids = collect_explicit_app_ids_from_skill_items(&skill_items, &connectors, &HashMap::new()); assert_eq!(connector_ids, HashSet::from(["calendar".to_string()])); } #[test] fn collect_explicit_app_ids_from_skill_items_resolves_unambiguous_plain_mentions() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse $calendar\n", )]; let connector_ids = collect_explicit_app_ids_from_skill_items(&skill_items, &connectors, &HashMap::new()); assert_eq!(connector_ids, HashSet::from(["calendar".to_string()])); } #[test] fn collect_explicit_app_ids_from_skill_items_skips_plain_mentions_with_skill_conflicts() { let connectors = vec![make_connector("calendar", "Calendar")]; let skill_items = vec![skill_message( "\ndemo\n/tmp/skills/demo/SKILL.md\nuse $calendar\n", )]; let skill_name_counts_lower = HashMap::from([("calendar".to_string(), 1)]); let connector_ids = collect_explicit_app_ids_from_skill_items( &skill_items, &connectors, &skill_name_counts_lower, ); assert_eq!(connector_ids, HashSet::::new()); } #[test] fn non_app_mcp_tools_remain_visible_without_search_selection() { let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool( "rmcp", "echo", /*connector_id*/ None, /*connector_name*/ None, ), ), ]); let mut selected_mcp_tools = mcp_tools .iter() .filter(|(_, tool)| tool.server_name != CODEX_APPS_MCP_SERVER_NAME) .map(|(name, tool)| (name.clone(), tool.clone())) .collect::>(); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("run echo")], &explicitly_enabled_connectors, &HashMap::new(), ); let config = test_config(); selected_mcp_tools.extend(filter_codex_apps_mcp_tools( &mcp_tools, &connectors, &config, )); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!(tool_names, vec!["mcp__rmcp__echo".to_string()]); } #[test] fn search_tool_selection_keeps_codex_apps_tools_without_mentions() { let selected_tool_names = [ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ]; let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool( "rmcp", "echo", /*connector_id*/ None, /*connector_name*/ None, ), ), ]); let mut selected_mcp_tools = mcp_tools .iter() .filter(|(name, _)| selected_tool_names.contains(name)) .map(|(name, tool)| (name.clone(), tool.clone())) .collect::>(); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("run the selected tools")], &explicitly_enabled_connectors, &HashMap::new(), ); let config = test_config(); selected_mcp_tools.extend(filter_codex_apps_mcp_tools( &mcp_tools, &connectors, &config, )); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!( tool_names, vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ] ); } #[test] fn apps_mentions_add_codex_apps_tools_to_search_selected_set() { let selected_tool_names = ["mcp__rmcp__echo".to_string()]; let mcp_tools = HashMap::from([ ( "mcp__codex_apps__calendar_create_event".to_string(), make_mcp_tool( CODEX_APPS_MCP_SERVER_NAME, "calendar_create_event", Some("calendar"), Some("Calendar"), ), ), ( "mcp__rmcp__echo".to_string(), make_mcp_tool( "rmcp", "echo", /*connector_id*/ None, /*connector_name*/ None, ), ), ]); let mut selected_mcp_tools = mcp_tools .iter() .filter(|(name, _)| selected_tool_names.contains(name)) .map(|(name, tool)| (name.clone(), tool.clone())) .collect::>(); let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); let explicitly_enabled_connectors = HashSet::new(); let connectors = filter_connectors_for_input( &connectors, &[user_message("use $calendar and then echo the response")], &explicitly_enabled_connectors, &HashMap::new(), ); let config = test_config(); selected_mcp_tools.extend(filter_codex_apps_mcp_tools( &mcp_tools, &connectors, &config, )); let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); tool_names.sort(); assert_eq!( tool_names, vec![ "mcp__codex_apps__calendar_create_event".to_string(), "mcp__rmcp__echo".to_string(), ] ); } #[tokio::test] async fn reconstruct_history_matches_live_compactions() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, expected) = sample_rollout(&session, &turn_context).await; let reconstruction_turn = session.new_default_turn().await; let reconstructed = session .reconstruct_history_from_rollout(reconstruction_turn.as_ref(), &rollout_items) .await; assert_eq!(expected, reconstructed.history); } #[tokio::test] async fn reconstruct_history_uses_replacement_history_verbatim() { let (session, turn_context) = make_session_and_context().await; let summary_item = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "summary".to_string(), }], end_turn: None, phase: None, }; let replacement_history = vec![ summary_item.clone(), ResponseItem::Message { id: None, role: "developer".to_string(), content: vec![ContentItem::InputText { text: "stale developer instructions".to_string(), }], end_turn: None, phase: None, }, ]; let rollout_items = vec![RolloutItem::Compacted(CompactedItem { message: String::new(), replacement_history: Some(replacement_history.clone()), })]; let reconstructed = session .reconstruct_history_from_rollout(&turn_context, &rollout_items) .await; assert_eq!(reconstructed.history, replacement_history); } #[tokio::test] async fn record_initial_history_reconstructs_resumed_transcript() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } #[tokio::test] async fn record_initial_history_new_defers_initial_context_until_first_turn() { let (session, _turn_context) = make_session_and_context().await; session.record_initial_history(InitialHistory::New).await; let history = session.clone_history().await; assert_eq!(history.raw_items().to_vec(), Vec::::new()); assert!(session.reference_context_item().await.is_none()); assert_eq!(session.previous_turn_settings().await, None); } #[tokio::test] async fn resumed_history_injects_initial_context_on_first_context_update_only() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, mut expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let history_before_seed = session.state.lock().await.clone_history(); assert_eq!(expected, history_before_seed.raw_items()); session .record_context_updates_and_set_reference_context_item(&turn_context) .await; expected.extend(session.build_initial_context(&turn_context).await); let history_after_seed = session.clone_history().await; assert_eq!(expected, history_after_seed.raw_items()); session .record_context_updates_and_set_reference_context_item(&turn_context) .await; let history_after_second_seed = session.clone_history().await; assert_eq!( history_after_seed.raw_items(), history_after_second_seed.raw_items() ); } #[tokio::test] async fn record_initial_history_seeds_token_info_from_rollout() { let (session, turn_context) = make_session_and_context().await; let (mut rollout_items, _expected) = sample_rollout(&session, &turn_context).await; let info1 = TokenUsageInfo { total_token_usage: TokenUsage { input_tokens: 10, cached_input_tokens: 0, output_tokens: 20, reasoning_output_tokens: 0, total_tokens: 30, }, last_token_usage: TokenUsage { input_tokens: 3, cached_input_tokens: 0, output_tokens: 4, reasoning_output_tokens: 0, total_tokens: 7, }, model_context_window: Some(1_000), }; let info2 = TokenUsageInfo { total_token_usage: TokenUsage { input_tokens: 100, cached_input_tokens: 50, output_tokens: 200, reasoning_output_tokens: 25, total_tokens: 375, }, last_token_usage: TokenUsage { input_tokens: 10, cached_input_tokens: 0, output_tokens: 20, reasoning_output_tokens: 5, total_tokens: 35, }, model_context_window: Some(2_000), }; rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: Some(info1), rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: None, rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: Some(info2.clone()), rate_limits: None, }, ))); rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount( TokenCountEvent { info: None, rate_limits: None, }, ))); session .record_initial_history(InitialHistory::Resumed(ResumedHistory { conversation_id: ThreadId::default(), history: rollout_items, rollout_path: PathBuf::from("/tmp/resume.jsonl"), })) .await; let actual = session.state.lock().await.token_info(); assert_eq!(actual, Some(info2)); } #[tokio::test] async fn recompute_token_usage_uses_session_base_instructions() { let (session, turn_context) = make_session_and_context().await; let override_instructions = "SESSION_OVERRIDE_INSTRUCTIONS_ONLY".repeat(120); { let mut state = session.state.lock().await; state.session_configuration.base_instructions = override_instructions.clone(); } let item = user_message("hello"); session .record_into_history(std::slice::from_ref(&item), &turn_context) .await; let history = session.clone_history().await; let session_base_instructions = BaseInstructions { text: override_instructions, }; let expected_tokens = history .estimate_token_count_with_base_instructions(&session_base_instructions) .expect("estimate with session base instructions"); let model_estimated_tokens = history .estimate_token_count(&turn_context) .expect("estimate with model instructions"); assert_ne!(expected_tokens, model_estimated_tokens); session.recompute_token_usage(&turn_context).await; let actual_tokens = session .state .lock() .await .token_info() .expect("token info") .last_token_usage .total_tokens; assert_eq!(actual_tokens, expected_tokens.max(0)); } #[tokio::test] async fn recompute_token_usage_updates_model_context_window() { let (session, mut turn_context) = make_session_and_context().await; { let mut state = session.state.lock().await; state.set_token_info(Some(TokenUsageInfo { total_token_usage: TokenUsage::default(), last_token_usage: TokenUsage::default(), model_context_window: Some(258_400), })); } turn_context.model_info.context_window = Some(128_000); turn_context.model_info.effective_context_window_percent = 100; session.recompute_token_usage(&turn_context).await; let actual = session.state.lock().await.token_info().expect("token info"); assert_eq!(actual.model_context_window, Some(128_000)); } #[tokio::test] async fn record_initial_history_reconstructs_forked_transcript() { let (session, turn_context) = make_session_and_context().await; let (rollout_items, expected) = sample_rollout(&session, &turn_context).await; session .record_initial_history(InitialHistory::Forked(rollout_items)) .await; let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn fork_startup_context_then_first_turn_diff_snapshot() -> anyhow::Result<()> { let server = start_mock_server().await; mount_sse_once( &server, sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), ) .await; let first_forked_request = mount_sse_once( &server, sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]), ) .await; let mut builder = test_codex().with_config(|config| { config.permissions.approval_policy = codex_config::Constrained::allow_any(AskForApproval::OnRequest); }); let initial = builder.build(&server).await?; let rollout_path = initial .session_configured .rollout_path .clone() .expect("rollout path"); initial .codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "fork seed".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; // Forking reads the persisted rollout JSONL, so force the completed source turn to disk // before snapshotting from it. initial.codex.ensure_rollout_materialized().await; initial.codex.flush_rollout().await; let mut fork_config = initial.config.clone(); fork_config.permissions.approval_policy = codex_config::Constrained::allow_any(AskForApproval::UnlessTrusted); let forked = initial .thread_manager .fork_thread( usize::MAX, fork_config, rollout_path, /*persist_extended_history*/ false, /*parent_trace*/ None, ) .await?; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: forked.session_configured.model.clone(), reasoning_effort: None, developer_instructions: Some("Fork turn collaboration instructions.".to_string()), }, }; forked .thread .submit(Op::OverrideTurnContext { cwd: None, approval_policy: Some(AskForApproval::Never), approvals_reviewer: None, sandbox_policy: None, windows_sandbox_level: None, model: None, effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; forked .thread .submit(Op::UserInput { items: vec![UserInput::Text { text: "after fork".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; wait_for_event(&forked.thread, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; let request = first_forked_request.single_request(); let snapshot = context_snapshot::format_labeled_requests_snapshot( "First request after fork when startup preserves the parent baseline, the fork changes approval policy, and the first forked turn enters plan mode.", &[("First Forked Turn Request", &request)], &ContextSnapshotOptions::default() .render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 96 }) .strip_capability_instructions() .strip_agents_md_user_context(), ); let mut settings = insta::Settings::clone_current(); settings.set_snapshot_path("snapshots"); settings.set_prepend_module_to_snapshot(false); settings.bind(|| { insta::assert_snapshot!( "codex_core__codex_tests__fork_startup_context_then_first_turn_diff", snapshot ); }); Ok(()) } #[tokio::test] async fn record_initial_history_forked_hydrates_previous_turn_settings() { let (session, turn_context) = make_session_and_context().await; let previous_model = "forked-rollout-model"; let previous_context_item = TurnContextItem { turn_id: Some(turn_context.sub_id.clone()), trace_id: turn_context.trace_id.clone(), cwd: turn_context.cwd.to_path_buf(), current_date: turn_context.current_date.clone(), timezone: turn_context.timezone.clone(), approval_policy: turn_context.approval_policy.value(), sandbox_policy: turn_context.sandbox_policy.get().clone(), network: None, model: previous_model.to_string(), personality: turn_context.personality, collaboration_mode: Some(turn_context.collaboration_mode.clone()), realtime_active: Some(turn_context.realtime_active), effort: turn_context.reasoning_effort, summary: turn_context.reasoning_summary, user_instructions: None, developer_instructions: None, final_output_json_schema: None, truncation_policy: Some(turn_context.truncation_policy), }; let turn_id = previous_context_item .turn_id .clone() .expect("turn context should have turn_id"); let rollout_items = vec![ RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage( codex_protocol::protocol::UserMessageEvent { message: "forked seed".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), }, )), RolloutItem::TurnContext(previous_context_item.clone()), RolloutItem::EventMsg(EventMsg::TurnComplete( codex_protocol::protocol::TurnCompleteEvent { turn_id, last_agent_message: None, completed_at: None, duration_ms: None, }, )), ]; session .record_initial_history(InitialHistory::Forked(rollout_items)) .await; let history = session.clone_history().await; assert_eq!( session.previous_turn_settings().await, Some(PreviousTurnSettings { model: previous_model.to_string(), realtime_active: Some(turn_context.realtime_active), }) ); assert_eq!(history.raw_items(), &[]); assert_eq!( serde_json::to_value(session.reference_context_item().await) .expect("serialize fork reference context item"), serde_json::to_value(Some(previous_context_item)) .expect("serialize expected reference context item") ); } #[tokio::test] async fn thread_rollback_drops_last_turn_from_history() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let rollout_path = attach_rollout_recorder(&sess).await; let initial_context = sess.build_initial_context(tc.as_ref()).await; let turn_1 = vec![ user_message("turn 1 user"), assistant_message("turn 1 assistant"), ]; let turn_2 = vec![ user_message("turn 2 user"), assistant_message("turn 2 assistant"), ]; let mut full_history = Vec::new(); full_history.extend(initial_context.clone()); full_history.extend(turn_1.clone()); full_history.extend(turn_2); sess.replace_history(full_history.clone(), Some(tc.to_turn_context_item())) .await; let rollout_items: Vec = full_history .into_iter() .map(RolloutItem::ResponseItem) .collect(); sess.persist_rollout_items(&rollout_items).await; sess.set_previous_turn_settings(Some(PreviousTurnSettings { model: "stale-model".to_string(), realtime_active: Some(tc.realtime_active), })) .await; { let mut state = sess.state.lock().await; state.set_reference_context_item(Some(tc.to_turn_context_item())); } handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 1); let mut expected = Vec::new(); expected.extend(initial_context); expected.extend(turn_1); let history = sess.clone_history().await; assert_eq!(expected, history.raw_items()); assert_eq!(sess.previous_turn_settings().await, None); assert!(sess.reference_context_item().await.is_none()); let InitialHistory::Resumed(resumed) = RolloutRecorder::get_rollout_history(&rollout_path) .await .expect("read rollout history") else { panic!("expected resumed rollout history"); }; assert!(resumed.history.iter().any(|item| { matches!( item, RolloutItem::EventMsg(EventMsg::ThreadRolledBack(rollback)) if rollback.num_turns == 1 ) })); } #[tokio::test] async fn thread_rollback_clears_history_when_num_turns_exceeds_existing_turns() { let (sess, tc, rx) = make_session_and_context_with_rx().await; attach_rollout_recorder(&sess).await; let initial_context = sess.build_initial_context(tc.as_ref()).await; let turn_1 = vec![user_message("turn 1 user")]; let mut full_history = Vec::new(); full_history.extend(initial_context.clone()); full_history.extend(turn_1); sess.replace_history(full_history.clone(), Some(tc.to_turn_context_item())) .await; let rollout_items: Vec = full_history .into_iter() .map(RolloutItem::ResponseItem) .collect(); sess.persist_rollout_items(&rollout_items).await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 99).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 99); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn thread_rollback_fails_without_persisted_rollout_path() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let error_event = wait_for_thread_rollback_failed(&rx).await; assert_eq!( error_event.message, "thread rollback requires a persisted rollout path" ); assert_eq!( error_event.codex_error_info, Some(CodexErrorInfo::ThreadRollbackFailed) ); assert_eq!(sess.clone_history().await.raw_items(), initial_context); } #[tokio::test] async fn thread_rollback_recomputes_previous_turn_settings_and_reference_context_from_replay() { let (sess, tc, rx) = make_session_and_context_with_rx().await; attach_rollout_recorder(&sess).await; let first_context_item = tc.to_turn_context_item(); let first_turn_id = first_context_item .turn_id .clone() .expect("turn context should have turn_id"); let mut rolled_back_context_item = first_context_item.clone(); rolled_back_context_item.turn_id = Some("rolled-back-turn".to_string()); rolled_back_context_item.model = "rolled-back-model".to_string(); let rolled_back_turn_id = rolled_back_context_item .turn_id .clone() .expect("turn context should have turn_id"); let turn_one_user = user_message("turn 1 user"); let turn_one_assistant = assistant_message("turn 1 assistant"); let turn_two_user = user_message("turn 2 user"); let turn_two_assistant = assistant_message("turn 2 assistant"); sess.persist_rollout_items(&[ RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: first_turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage( codex_protocol::protocol::UserMessageEvent { message: "turn 1 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), }, )), RolloutItem::TurnContext(first_context_item.clone()), RolloutItem::ResponseItem(turn_one_user.clone()), RolloutItem::ResponseItem(turn_one_assistant.clone()), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: first_turn_id, last_agent_message: None, completed_at: None, duration_ms: None, })), RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: rolled_back_turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage( codex_protocol::protocol::UserMessageEvent { message: "turn 2 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), }, )), RolloutItem::TurnContext(rolled_back_context_item), RolloutItem::ResponseItem(turn_two_user), RolloutItem::ResponseItem(turn_two_assistant), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: rolled_back_turn_id, last_agent_message: None, completed_at: None, duration_ms: None, })), ]) .await; sess.replace_history( vec![assistant_message("stale history")], Some(first_context_item.clone()), ) .await; sess.set_previous_turn_settings(Some(PreviousTurnSettings { model: "stale-model".to_string(), realtime_active: None, })) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 1); assert_eq!( sess.clone_history().await.raw_items(), vec![turn_one_user, turn_one_assistant] ); assert_eq!( sess.previous_turn_settings().await, Some(PreviousTurnSettings { model: tc.model_info.slug.clone(), realtime_active: Some(tc.realtime_active), }) ); assert_eq!( serde_json::to_value(sess.reference_context_item().await) .expect("serialize replay reference context item"), serde_json::to_value(Some(first_context_item)) .expect("serialize expected reference context item") ); } #[tokio::test] async fn thread_rollback_restores_cleared_reference_context_item_after_compaction() { let (sess, tc, rx) = make_session_and_context_with_rx().await; attach_rollout_recorder(&sess).await; let first_context_item = tc.to_turn_context_item(); let first_turn_id = first_context_item .turn_id .clone() .expect("turn context should have turn_id"); let compact_turn_id = "compact-turn".to_string(); let rolled_back_turn_id = "rolled-back-turn".to_string(); let compacted_history = vec![ user_message("turn 1 user"), user_message("summary after compaction"), ]; sess.persist_rollout_items(&[ RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: first_turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { message: "turn 1 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), })), RolloutItem::TurnContext(first_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 1 user")), RolloutItem::ResponseItem(assistant_message("turn 1 assistant")), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: first_turn_id, last_agent_message: None, completed_at: None, duration_ms: None, })), RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: compact_turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::Compacted(CompactedItem { message: "summary after compaction".to_string(), replacement_history: Some(compacted_history.clone()), }), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: compact_turn_id, last_agent_message: None, completed_at: None, duration_ms: None, })), RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: rolled_back_turn_id.clone(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { message: "turn 2 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), })), RolloutItem::TurnContext(TurnContextItem { turn_id: Some(rolled_back_turn_id.clone()), model: "rolled-back-model".to_string(), ..first_context_item.clone() }), RolloutItem::ResponseItem(user_message("turn 2 user")), RolloutItem::ResponseItem(assistant_message("turn 2 assistant")), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: rolled_back_turn_id, last_agent_message: None, completed_at: None, duration_ms: None, })), ]) .await; sess.replace_history( vec![assistant_message("stale history")], Some(first_context_item), ) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let rollback_event = wait_for_thread_rolled_back(&rx).await; assert_eq!(rollback_event.num_turns, 1); assert_eq!(sess.clone_history().await.raw_items(), compacted_history); assert!(sess.reference_context_item().await.is_none()); } #[tokio::test] async fn thread_rollback_persists_marker_and_replays_cumulatively() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let rollout_path = attach_rollout_recorder(&sess).await; let turn_context_item = tc.to_turn_context_item(); sess.persist_rollout_items(&[ RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: "turn-1".to_string(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { message: "turn 1 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), })), RolloutItem::TurnContext(turn_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 1 user")), RolloutItem::ResponseItem(assistant_message("turn 1 assistant")), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-1".to_string(), last_agent_message: None, completed_at: None, duration_ms: None, })), RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: "turn-2".to_string(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { message: "turn 2 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), })), RolloutItem::TurnContext(turn_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 2 user")), RolloutItem::ResponseItem(assistant_message("turn 2 assistant")), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-2".to_string(), last_agent_message: None, completed_at: None, duration_ms: None, })), RolloutItem::EventMsg(EventMsg::TurnStarted( codex_protocol::protocol::TurnStartedEvent { turn_id: "turn-3".to_string(), started_at: None, model_context_window: Some(128_000), collaboration_mode_kind: ModeKind::Default, }, )), RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent { message: "turn 3 user".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), })), RolloutItem::TurnContext(turn_context_item), RolloutItem::ResponseItem(user_message("turn 3 user")), RolloutItem::ResponseItem(assistant_message("turn 3 assistant")), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-3".to_string(), last_agent_message: None, completed_at: None, duration_ms: None, })), ]) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let first_rollback = wait_for_thread_rolled_back(&rx).await; assert_eq!(first_rollback.num_turns, 1); handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let second_rollback = wait_for_thread_rolled_back(&rx).await; assert_eq!(second_rollback.num_turns, 1); assert_eq!( sess.clone_history().await.raw_items(), vec![ user_message("turn 1 user"), assistant_message("turn 1 assistant") ] ); let InitialHistory::Resumed(resumed) = RolloutRecorder::get_rollout_history(&rollout_path) .await .expect("read rollout history") else { panic!("expected resumed rollout history"); }; let rollback_markers = resumed .history .iter() .filter(|item| matches!(item, RolloutItem::EventMsg(EventMsg::ThreadRolledBack(_)))) .count(); assert_eq!(rollback_markers, 2); } #[tokio::test] async fn thread_rollback_fails_when_turn_in_progress() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; *sess.active_turn.lock().await = Some(crate::state::ActiveTurn::default()); handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 1).await; let error_event = wait_for_thread_rollback_failed(&rx).await; assert_eq!( error_event.codex_error_info, Some(CodexErrorInfo::ThreadRollbackFailed) ); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn thread_rollback_fails_when_num_turns_is_zero() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let initial_context = sess.build_initial_context(tc.as_ref()).await; sess.record_into_history(&initial_context, tc.as_ref()) .await; handlers::thread_rollback(&sess, "sub-1".to_string(), /*num_turns*/ 0).await; let error_event = wait_for_thread_rollback_failed(&rx).await; assert_eq!(error_event.message, "num_turns must be >= 1"); assert_eq!( error_event.codex_error_info, Some(CodexErrorInfo::ThreadRollbackFailed) ); let history = sess.clone_history().await; assert_eq!(initial_context, history.raw_items()); } #[tokio::test] async fn set_rate_limits_retains_previous_credits() { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, }; let mut state = SessionState::new(session_configuration); let initial = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 10.0, window_minutes: Some(15), resets_at: Some(1_700), }), secondary: None, credits: Some(CreditsSnapshot { has_credits: true, unlimited: false, balance: Some("10.00".to_string()), }), plan_type: Some(codex_protocol::account::PlanType::Plus), }; state.set_rate_limits(initial.clone()); let update = RateLimitSnapshot { limit_id: Some("codex_other".to_string()), limit_name: Some("codex_other".to_string()), primary: Some(RateLimitWindow { used_percent: 40.0, window_minutes: Some(30), resets_at: Some(1_800), }), secondary: Some(RateLimitWindow { used_percent: 5.0, window_minutes: Some(60), resets_at: Some(1_900), }), credits: None, plan_type: None, }; state.set_rate_limits(update.clone()); assert_eq!( state.latest_rate_limits, Some(RateLimitSnapshot { limit_id: Some("codex_other".to_string()), limit_name: Some("codex_other".to_string()), primary: update.primary.clone(), secondary: update.secondary, credits: initial.credits, plan_type: initial.plan_type, }) ); } #[tokio::test] async fn set_rate_limits_updates_plan_type_when_present() { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, }; let mut state = SessionState::new(session_configuration); let initial = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 15.0, window_minutes: Some(20), resets_at: Some(1_600), }), secondary: Some(RateLimitWindow { used_percent: 5.0, window_minutes: Some(45), resets_at: Some(1_650), }), credits: Some(CreditsSnapshot { has_credits: true, unlimited: false, balance: Some("15.00".to_string()), }), plan_type: Some(codex_protocol::account::PlanType::Plus), }; state.set_rate_limits(initial.clone()); let update = RateLimitSnapshot { limit_id: None, limit_name: None, primary: Some(RateLimitWindow { used_percent: 35.0, window_minutes: Some(25), resets_at: Some(1_700), }), secondary: None, credits: None, plan_type: Some(codex_protocol::account::PlanType::Pro), }; state.set_rate_limits(update.clone()); assert_eq!( state.latest_rate_limits, Some(RateLimitSnapshot { limit_id: Some("codex".to_string()), limit_name: None, primary: update.primary, secondary: update.secondary, credits: initial.credits, plan_type: update.plan_type, }) ); } #[test] fn prefers_structured_content_when_present() { let ctr = McpCallToolResult { // Content present but should be ignored because structured_content is set. content: vec![text_block("ignored")], is_error: None, structured_content: Some(json!({ "ok": true, "value": 42 })), meta: None, }; let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ "ok": true, "value": 42 })) .unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } #[tokio::test] async fn includes_timed_out_message() { let exec = ExecToolCallOutput { exit_code: 0, stdout: StreamOutput::new(String::new()), stderr: StreamOutput::new(String::new()), aggregated_output: StreamOutput::new("Command output".to_string()), duration: StdDuration::from_secs(1), timed_out: true, }; let (_, turn_context) = make_session_and_context().await; let out = format_exec_output_str(&exec, turn_context.truncation_policy); assert_eq!( out, "command timed out after 1000 milliseconds\nCommand output" ); } #[tokio::test] async fn turn_context_with_model_updates_model_fields() { let (session, mut turn_context) = make_session_and_context().await; turn_context.reasoning_effort = Some(ReasoningEffortConfig::Minimal); let updated = turn_context .with_model("gpt-5.1".to_string(), &session.services.models_manager) .await; let expected_model_info = session .services .models_manager .get_model_info( "gpt-5.1", &updated.config.as_ref().to_models_manager_config(), ) .await; assert_eq!(updated.config.model.as_deref(), Some("gpt-5.1")); assert_eq!(updated.collaboration_mode.model(), "gpt-5.1"); assert_eq!(updated.model_info, expected_model_info); assert_eq!( updated.reasoning_effort, Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.collaboration_mode.reasoning_effort(), Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.config.model_reasoning_effort, Some(ReasoningEffortConfig::Medium) ); assert_eq!( updated.truncation_policy, expected_model_info.truncation_policy.into() ); assert!(!Arc::ptr_eq( &updated.tool_call_gate, &turn_context.tool_call_gate )); } #[test] fn falls_back_to_content_when_structured_is_null() { let ctr = McpCallToolResult { content: vec![text_block("hello"), text_block("world")], is_error: None, structured_content: Some(serde_json::Value::Null), meta: None, }; let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } #[test] fn success_flag_reflects_is_error_true() { let ctr = McpCallToolResult { content: vec![text_block("unused")], is_error: Some(true), structured_content: Some(json!({ "message": "bad" })), meta: None, }; let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ "message": "bad" })).unwrap(), ), success: Some(false), }; assert_eq!(expected, got); } #[test] fn success_flag_true_with_no_error_and_content_used() { let ctr = McpCallToolResult { content: vec![text_block("alpha")], is_error: Some(false), structured_content: None, meta: None, }; let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("alpha")]).unwrap(), ), success: Some(true), }; assert_eq!(expected, got); } async fn wait_for_thread_rolled_back(rx: &async_channel::Receiver) -> ThreadRolledBackEvent { let deadline = StdDuration::from_secs(2); let start = std::time::Instant::now(); loop { let remaining = deadline.saturating_sub(start.elapsed()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::ThreadRolledBack(payload) => return payload, _ => continue, } } } async fn wait_for_thread_rollback_failed(rx: &async_channel::Receiver) -> ErrorEvent { let deadline = StdDuration::from_secs(2); let start = std::time::Instant::now(); loop { let remaining = deadline.saturating_sub(start.elapsed()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::Error(payload) if payload.codex_error_info == Some(CodexErrorInfo::ThreadRollbackFailed) => { return payload; } _ => continue, } } } async fn attach_rollout_recorder(session: &Arc) -> PathBuf { let config = session.get_config().await; let recorder = RolloutRecorder::new( config.as_ref(), RolloutRecorderParams::new( ThreadId::default(), /*forked_from_id*/ None, SessionSource::Exec, BaseInstructions::default(), Vec::new(), EventPersistenceMode::Limited, ), /*state_db_ctx*/ None, /*state_builder*/ None, ) .await .expect("create rollout recorder"); let rollout_path = recorder.rollout_path().to_path_buf(); { let mut rollout = session.services.rollout.lock().await; *rollout = Some(recorder); } session.ensure_rollout_materialized().await; session.flush_rollout().await; rollout_path } fn text_block(s: &str) -> serde_json::Value { json!({ "type": "text", "text": s, }) } async fn build_test_config(codex_home: &Path) -> Config { ConfigBuilder::without_managed_config_for_tests() .codex_home(codex_home.to_path_buf()) .build() .await .expect("load default test config") } fn session_telemetry( conversation_id: ThreadId, config: &Config, model_info: &ModelInfo, session_source: SessionSource, ) -> SessionTelemetry { SessionTelemetry::new( conversation_id, ModelsManager::get_model_offline_for_tests(config.model.as_deref()).as_str(), model_info.slug.as_str(), /*account_id*/ None, Some("test@test.com".to_string()), Some(TelemetryAuthMode::Chatgpt), "test_originator".to_string(), /*log_user_prompts*/ false, "test".to_string(), session_source, ) } pub(crate) async fn make_session_configuration_for_tests() -> SessionConfiguration { let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, } } #[tokio::test] async fn session_configuration_apply_preserves_split_file_system_policy_on_cwd_only_update() { let mut session_configuration = make_session_configuration_for_tests().await; let workspace = tempfile::tempdir().expect("create temp dir"); let project_root = workspace.path().join("project"); let original_cwd = project_root.join("subdir"); let docs_dir = original_cwd.join("docs"); std::fs::create_dir_all(&docs_dir).expect("create docs dir"); let docs_dir = docs_dir.abs(); session_configuration.cwd = original_cwd.abs(); session_configuration.sandbox_policy = codex_config::Constrained::allow_any(SandboxPolicy::WorkspaceWrite { writable_roots: Vec::new(), read_only_access: ReadOnlyAccess::Restricted { include_platform_defaults: true, readable_roots: vec![docs_dir.clone()], }, network_access: false, exclude_tmpdir_env_var: true, exclude_slash_tmp: true, }); session_configuration.file_system_sandbox_policy = FileSystemSandboxPolicy::restricted(vec![ FileSystemSandboxEntry { path: FileSystemPath::Special { value: FileSystemSpecialPath::CurrentWorkingDirectory, }, access: FileSystemAccessMode::Write, }, FileSystemSandboxEntry { path: FileSystemPath::Path { path: docs_dir }, access: FileSystemAccessMode::Read, }, ]); let updated = session_configuration .apply(&SessionSettingsUpdate { cwd: Some(project_root), ..Default::default() }) .expect("cwd-only update should succeed"); assert_eq!( updated.file_system_sandbox_policy, session_configuration.file_system_sandbox_policy ); } #[cfg_attr(windows, ignore)] #[tokio::test] async fn new_default_turn_uses_config_aware_skills_for_role_overrides() { let (session, _turn_context) = make_session_and_context().await; let parent_config = session.get_config().await; let codex_home = parent_config.codex_home.clone(); let skill_dir = codex_home.join("skills").join("demo"); std::fs::create_dir_all(&skill_dir).expect("create skill dir"); let skill_path = skill_dir.join("SKILL.md"); std::fs::write( &skill_path, "---\nname: demo-skill\ndescription: demo description\n---\n\n# Body\n", ) .expect("write skill"); let parent_outcome = session .services .skills_manager .skills_for_cwd( &crate::skills_load_input_from_config(&parent_config, Vec::new()), /*force_reload*/ true, ) .await; let parent_skill = parent_outcome .skills .iter() .find(|skill| skill.name == "demo-skill") .expect("demo skill should be discovered"); assert_eq!(parent_outcome.is_skill_enabled(parent_skill), true); let role_path = codex_home.join("skills-role.toml"); std::fs::write( &role_path, format!( r#"developer_instructions = "Stay focused" [[skills.config]] path = "{}" enabled = false "#, skill_path.display() ), ) .expect("write role config"); let mut child_config = (*parent_config).clone(); child_config.agent_roles.insert( "custom".to_string(), crate::config::AgentRoleConfig { description: None, config_file: Some(role_path), nickname_candidates: None, }, ); crate::agent::role::apply_role_to_config(&mut child_config, Some("custom")) .await .expect("custom role should apply"); { let mut state = session.state.lock().await; state.session_configuration.original_config_do_not_use = Arc::new(child_config); } let child_turn = session .new_default_turn_with_sub_id("role-skill-turn".to_string()) .await; let child_skill = child_turn .turn_skills .outcome .skills .iter() .find(|skill| skill.name == "demo-skill") .expect("demo skill should be discovered"); assert_eq!( child_turn.turn_skills.outcome.is_skill_enabled(child_skill), false ); } #[tokio::test] async fn session_configuration_apply_rederives_legacy_file_system_policy_on_cwd_update() { let mut session_configuration = make_session_configuration_for_tests().await; let workspace = tempfile::tempdir().expect("create temp dir"); let project_root = workspace.path().join("project"); let original_cwd = project_root.join("subdir"); let docs_dir = original_cwd.join("docs"); std::fs::create_dir_all(&docs_dir).expect("create docs dir"); let docs_dir = docs_dir.abs(); session_configuration.cwd = original_cwd.abs(); session_configuration.sandbox_policy = codex_config::Constrained::allow_any(SandboxPolicy::WorkspaceWrite { writable_roots: Vec::new(), read_only_access: ReadOnlyAccess::Restricted { include_platform_defaults: true, readable_roots: vec![docs_dir], }, network_access: false, exclude_tmpdir_env_var: true, exclude_slash_tmp: true, }); session_configuration.file_system_sandbox_policy = FileSystemSandboxPolicy::from_legacy_sandbox_policy( session_configuration.sandbox_policy.get(), &session_configuration.cwd, ); let updated = session_configuration .apply(&SessionSettingsUpdate { cwd: Some(project_root.clone()), ..Default::default() }) .expect("cwd-only update should succeed"); assert_eq!( updated.file_system_sandbox_policy, FileSystemSandboxPolicy::from_legacy_sandbox_policy( updated.sandbox_policy.get(), &project_root, ) ); } #[tokio::test] async fn session_update_settings_keeps_runtime_cwds_absolute() { let (session, turn_context) = make_session_and_context().await; let updated_cwd = turn_context.cwd.join("project"); std::fs::create_dir_all(updated_cwd.as_path()).expect("create project dir"); session .update_settings(SessionSettingsUpdate { cwd: Some(PathBuf::from("project")), ..Default::default() }) .await .expect("cwd update should succeed"); let session_cwd = { let state = session.state.lock().await; state.session_configuration.cwd.clone() }; let config = session.get_config().await; let next_turn = session.new_default_turn().await; assert_eq!(session_cwd, updated_cwd); assert_eq!(config.cwd, turn_context.cwd); assert_eq!(next_turn.cwd, updated_cwd); assert_eq!(next_turn.config.cwd, updated_cwd); } #[tokio::test] async fn session_new_fails_when_zsh_fork_enabled_without_zsh_path() { let codex_home = tempfile::tempdir().expect("create temp dir"); let mut config = build_test_config(codex_home.path()).await; config .features .enable(Feature::ShellZshFork) .expect("test config should allow shell_zsh_fork"); config.zsh_path = None; let config = Arc::new(config); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let models_manager = Arc::new(ModelsManager::new( config.codex_home.clone(), auth_manager.clone(), /*model_catalog*/ None, CollaborationModesConfig::default(), )); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort: config.model_reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, }; let (tx_event, _rx_event) = async_channel::unbounded(); let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit); let plugins_manager = Arc::new(PluginsManager::new(config.codex_home.clone())); let mcp_manager = Arc::new(McpManager::new(Arc::clone(&plugins_manager))); let skills_manager = Arc::new(SkillsManager::new( config.codex_home.clone(), /*bundled_skills_enabled*/ true, )); let result = Session::new( session_configuration, Arc::clone(&config), auth_manager, models_manager, Arc::new(ExecPolicyManager::default()), tx_event, agent_status_tx, InitialHistory::New, SessionSource::Exec, skills_manager, plugins_manager, mcp_manager, Arc::new(SkillsWatcher::noop()), AgentControl::default(), Some(Arc::new( codex_exec_server::Environment::create(/*exec_server_url*/ None) .await .expect("create environment"), )), ) .await; let err = match result { Ok(_) => panic!("expected startup to fail"), Err(err) => err, }; let msg = format!("{err:#}"); assert!(msg.contains("zsh fork feature enabled, but `zsh_path` is not configured")); } // todo: use online model info pub(crate) async fn make_session_and_context() -> (Session, TurnContext) { let (tx_event, _rx_event) = async_channel::unbounded(); let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let models_manager = Arc::new(ModelsManager::new( config.codex_home.clone(), auth_manager.clone(), /*model_catalog*/ None, CollaborationModesConfig::default(), )); let agent_control = AgentControl::default(); let exec_policy = Arc::new(ExecPolicyManager::default()); let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools: Vec::new(), persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); let model_info = ModelsManager::construct_model_info_offline_for_tests( session_configuration.collaboration_mode.model(), &per_turn_config.to_models_manager_config(), ); let session_telemetry = session_telemetry( conversation_id, config.as_ref(), &model_info, session_configuration.session_source.clone(), ); let state = SessionState::new(session_configuration.clone()); let plugins_manager = Arc::new(PluginsManager::new(config.codex_home.clone())); let mcp_manager = Arc::new(McpManager::new(Arc::clone(&plugins_manager))); let skills_manager = Arc::new(SkillsManager::new( config.codex_home.clone(), /*bundled_skills_enabled*/ true, )); let network_approval = Arc::new(NetworkApprovalService::default()); let environment = Arc::new( codex_exec_server::Environment::create(/*exec_server_url*/ None) .await .expect("create environment"), ); let skills_watcher = Arc::new(SkillsWatcher::noop()); let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::new_uninitialized( &config.permissions.approval_policy, &config.permissions.sandbox_policy, ))), mcp_startup_cancellation_token: Mutex::new(CancellationToken::new()), unified_exec_manager: UnifiedExecProcessManager::new( config.background_terminal_max_timeout, ), shell_zsh_path: None, main_execve_wrapper_exe: config.main_execve_wrapper_exe.clone(), analytics_events_client: AnalyticsEventsClient::new( Arc::clone(&auth_manager), config.chatgpt_base_url.trim_end_matches('/').to_string(), config.analytics_enabled, ), hooks: Hooks::new(HooksConfig { legacy_notify_argv: config.notify.clone(), ..HooksConfig::default() }), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), shell_snapshot_tx: watch::channel(None).0, show_raw_agent_reasoning: config.show_raw_agent_reasoning, exec_policy, auth_manager: auth_manager.clone(), session_telemetry: session_telemetry.clone(), models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), guardian_rejection_rationales: Mutex::new(std::collections::HashMap::new()), skills_manager, plugins_manager, mcp_manager, skills_watcher, agent_control, network_proxy: None, network_approval: Arc::clone(&network_approval), state_db: None, model_client: ModelClient::new( Some(auth_manager.clone()), conversation_id, /*installation_id*/ "11111111-1111-4111-8111-111111111111".to_string(), session_configuration.provider.clone(), session_configuration.session_source.clone(), config.model_verbosity, config.features.enabled(Feature::EnableRequestCompression), config.features.enabled(Feature::RuntimeMetrics), Session::build_model_client_beta_features_header(config.as_ref()), ), code_mode_service: crate::tools::code_mode::CodeModeService::new( config.js_repl_node_path.clone(), ), environment: Some(Arc::clone(&environment)), }; let js_repl = Arc::new(JsReplHandle::with_node_path( config.js_repl_node_path.clone(), config.js_repl_node_module_dirs.clone(), )); let plugin_outcome = services .plugins_manager .plugins_for_config(&per_turn_config); let effective_skill_roots = plugin_outcome.effective_skill_roots(); let skills_input = crate::skills_load_input_from_config(&per_turn_config, effective_skill_roots); let skills_outcome = Arc::new(services.skills_manager.skills_for_config(&skills_input)); let turn_context = Session::make_turn_context( conversation_id, Some(Arc::clone(&auth_manager)), &session_telemetry, session_configuration.provider.clone(), &session_configuration, services.user_shell.as_ref(), services.shell_zsh_path.as_ref(), services.main_execve_wrapper_exe.as_ref(), per_turn_config, model_info, &models_manager, /*network*/ None, Some(environment), "turn_id".to_string(), Arc::clone(&js_repl), skills_outcome, ); let (mailbox, mailbox_rx) = crate::agent::Mailbox::new(); let session = Session { conversation_id, tx_event, agent_status: agent_status_tx, out_of_band_elicitation_paused: watch::channel(false).0, state: Mutex::new(state), managed_network_proxy_refresh_lock: Mutex::new(()), features: config.features.clone(), pending_mcp_server_refresh_config: Mutex::new(None), conversation: Arc::new(RealtimeConversationManager::new()), active_turn: Mutex::new(None), mailbox, mailbox_rx: Mutex::new(mailbox_rx), idle_pending_input: Mutex::new(Vec::new()), guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, js_repl, next_internal_sub_id: AtomicU64::new(0), }; (session, turn_context) } #[tokio::test] async fn notify_request_permissions_response_ignores_unmatched_call_id() { let (session, _turn_context) = make_session_and_context().await; *session.active_turn.lock().await = Some(ActiveTurn::default()); session .notify_request_permissions_response( "missing", codex_protocol::request_permissions::RequestPermissionsResponse { permissions: RequestPermissionProfile { network: Some(codex_protocol::models::NetworkPermissions { enabled: Some(true), }), ..RequestPermissionProfile::default() }, scope: PermissionGrantScope::Turn, }, ) .await; assert_eq!(session.granted_turn_permissions().await, None); } #[tokio::test] async fn request_permissions_emits_event_when_granular_policy_allows_requests() { let (session, mut turn_context, rx) = make_session_and_context_with_rx().await; *session.active_turn.lock().await = Some(ActiveTurn::default()); Arc::get_mut(&mut turn_context) .expect("single turn context ref") .approval_policy .set(AskForApproval::Granular(GranularApprovalConfig { sandbox_approval: true, rules: true, skill_approval: true, request_permissions: true, mcp_elicitations: true, })) .expect("test setup should allow updating approval policy"); let session = Arc::new(session); let turn_context = Arc::new(turn_context); let call_id = "call-1".to_string(); let expected_response = codex_protocol::request_permissions::RequestPermissionsResponse { permissions: RequestPermissionProfile { network: Some(codex_protocol::models::NetworkPermissions { enabled: Some(true), }), ..RequestPermissionProfile::default() }, scope: PermissionGrantScope::Turn, }; let handle = tokio::spawn({ let session = Arc::clone(&session); let turn_context = Arc::clone(&turn_context); let call_id = call_id.clone(); async move { session .request_permissions( turn_context.as_ref(), call_id, codex_protocol::request_permissions::RequestPermissionsArgs { reason: Some("need network".to_string()), permissions: RequestPermissionProfile { network: Some(codex_protocol::models::NetworkPermissions { enabled: Some(true), }), ..RequestPermissionProfile::default() }, }, ) .await } }); let request_event = tokio::time::timeout(StdDuration::from_secs(1), rx.recv()) .await .expect("request_permissions event timed out") .expect("request_permissions event missing"); let EventMsg::RequestPermissions(request) = request_event.msg else { panic!("expected request_permissions event"); }; assert_eq!(request.call_id, call_id); session .notify_request_permissions_response(&request.call_id, expected_response.clone()) .await; let response = tokio::time::timeout(StdDuration::from_secs(1), handle) .await .expect("request_permissions future timed out") .expect("request_permissions join error"); assert_eq!(response, Some(expected_response)); } #[tokio::test] async fn request_permissions_is_auto_denied_when_granular_policy_blocks_tool_requests() { let (session, mut turn_context, rx) = make_session_and_context_with_rx().await; *session.active_turn.lock().await = Some(ActiveTurn::default()); Arc::get_mut(&mut turn_context) .expect("single turn context ref") .approval_policy .set(AskForApproval::Granular(GranularApprovalConfig { sandbox_approval: true, rules: true, skill_approval: true, request_permissions: false, mcp_elicitations: true, })) .expect("test setup should allow updating approval policy"); let session = Arc::new(session); let turn_context = Arc::new(turn_context); let call_id = "call-1".to_string(); let response = session .request_permissions( turn_context.as_ref(), call_id, codex_protocol::request_permissions::RequestPermissionsArgs { reason: Some("need network".to_string()), permissions: RequestPermissionProfile { network: Some(codex_protocol::models::NetworkPermissions { enabled: Some(true), }), ..RequestPermissionProfile::default() }, }, ) .await; assert_eq!( response, Some( codex_protocol::request_permissions::RequestPermissionsResponse { permissions: RequestPermissionProfile::default(), scope: PermissionGrantScope::Turn, } ) ); assert!( tokio::time::timeout(StdDuration::from_millis(100), rx.recv()) .await .is_err(), "request_permissions should not emit an event when granular.request_permissions is false" ); } #[tokio::test] async fn submit_with_id_captures_current_span_trace_context() { let (session, _turn_context) = make_session_and_context().await; let (tx_sub, rx_sub) = async_channel::bounded(1); let (_tx_event, rx_event) = async_channel::unbounded(); let (_agent_status_tx, agent_status) = watch::channel(AgentStatus::PendingInit); let codex = Codex { tx_sub, rx_event, agent_status, session: Arc::new(session), session_loop_termination: completed_session_loop_termination(), }; let _trace_test_context = install_test_tracing("codex-core-tests"); let request_parent = W3cTraceContext { traceparent: Some("00-00000000000000000000000000000011-0000000000000022-01".into()), tracestate: Some("vendor=value".into()), }; let request_span = info_span!("app_server.request"); assert!(set_parent_from_w3c_trace_context( &request_span, &request_parent )); let expected_trace = async { let expected_trace = current_span_w3c_trace_context().expect("current span should have trace context"); codex .submit_with_id(Submission { id: "sub-1".into(), op: Op::Interrupt, trace: None, }) .await .expect("submit should succeed"); expected_trace } .instrument(request_span) .await; let submitted = rx_sub.recv().await.expect("submission"); assert_eq!(submitted.trace, Some(expected_trace)); } #[tokio::test] async fn new_default_turn_captures_current_span_trace_id() { let (session, _turn_context) = make_session_and_context().await; let _trace_test_context = install_test_tracing("codex-core-tests"); let request_parent = W3cTraceContext { traceparent: Some("00-00000000000000000000000000000011-0000000000000022-01".into()), tracestate: Some("vendor=value".into()), }; let request_span = info_span!("app_server.request"); assert!(set_parent_from_w3c_trace_context( &request_span, &request_parent )); let turn_context_item = async { let expected_trace_id = Span::current() .context() .span() .span_context() .trace_id() .to_string(); let turn_context = session.new_default_turn().await; let turn_context_item = turn_context.to_turn_context_item(); assert_eq!(turn_context_item.trace_id, Some(expected_trace_id)); turn_context_item } .instrument(request_span) .await; assert_eq!( turn_context_item.trace_id.as_deref(), Some("00000000000000000000000000000011") ); } #[test] fn submission_dispatch_span_prefers_submission_trace_context() { let _trace_test_context = install_test_tracing("codex-core-tests"); let ambient_parent = W3cTraceContext { traceparent: Some("00-00000000000000000000000000000033-0000000000000044-01".into()), tracestate: None, }; let ambient_span = info_span!("ambient"); assert!(set_parent_from_w3c_trace_context( &ambient_span, &ambient_parent )); let submission_trace = W3cTraceContext { traceparent: Some("00-00000000000000000000000000000055-0000000000000066-01".into()), tracestate: Some("vendor=value".into()), }; let dispatch_span = ambient_span.in_scope(|| { submission_dispatch_span(&Submission { id: "sub-1".into(), op: Op::Interrupt, trace: Some(submission_trace), }) }); let trace_id = dispatch_span.context().span().span_context().trace_id(); assert_eq!( trace_id, TraceId::from_hex("00000000000000000000000000000055").expect("trace id") ); } #[test] fn submission_dispatch_span_uses_debug_for_realtime_audio() { let _trace_test_context = install_test_tracing("codex-core-tests"); let dispatch_span = submission_dispatch_span(&Submission { id: "sub-1".into(), op: Op::RealtimeConversationAudio(ConversationAudioParams { frame: RealtimeAudioFrame { data: "ZmFrZQ==".into(), sample_rate: 16_000, num_channels: 1, samples_per_channel: Some(160), item_id: None, }, }), trace: None, }); assert_eq!( dispatch_span.metadata().expect("span metadata").level(), &tracing::Level::DEBUG ); } #[test] fn op_kind_distinguishes_turn_ops() { assert_eq!( Op::OverrideTurnContext { cwd: None, approval_policy: None, approvals_reviewer: None, sandbox_policy: None, windows_sandbox_level: None, model: None, effort: None, summary: None, service_tier: None, collaboration_mode: None, personality: None, } .kind(), "override_turn_context" ); assert_eq!( Op::UserInput { items: vec![], final_output_json_schema: None, } .kind(), "user_input" ); } #[tokio::test] async fn user_turn_updates_approvals_reviewer() { let (session, turn_context, _rx) = make_session_and_context_with_rx().await; let config = session.get_config().await; handlers::user_input_or_turn( &session, "sub-1".to_string(), Op::UserTurn { items: vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }], cwd: config.cwd.to_path_buf(), approval_policy: config.permissions.approval_policy.value(), approvals_reviewer: Some(codex_config::types::ApprovalsReviewer::GuardianSubagent), sandbox_policy: config.permissions.sandbox_policy.get().clone(), model: turn_context.model_info.slug.clone(), effort: config.model_reasoning_effort, summary: config.model_reasoning_summary, service_tier: None, final_output_json_schema: None, collaboration_mode: None, personality: config.personality, }, ) .await; let state = session.state.lock().await; assert_eq!( state.session_configuration.approvals_reviewer, codex_config::types::ApprovalsReviewer::GuardianSubagent ); } #[tokio::test] async fn spawn_task_turn_span_inherits_dispatch_trace_context() { struct TraceCaptureTask { captured_trace: Arc>>, } impl SessionTask for TraceCaptureTask { fn kind(&self) -> TaskKind { TaskKind::Regular } fn span_name(&self) -> &'static str { "session_task.trace_capture" } async fn run( self: Arc, _session: Arc, _ctx: Arc, _input: Vec, _cancellation_token: CancellationToken, ) -> Option { let mut trace = self .captured_trace .lock() .unwrap_or_else(std::sync::PoisonError::into_inner); *trace = current_span_w3c_trace_context(); None } } let _trace_test_context = install_test_tracing("codex-core-tests"); let request_parent = W3cTraceContext { traceparent: Some("00-00000000000000000000000000000011-0000000000000022-01".into()), tracestate: Some("vendor=value".into()), }; let request_span = tracing::info_span!("app_server.request"); assert!(set_parent_from_w3c_trace_context( &request_span, &request_parent )); let submission_trace = async { current_span_w3c_trace_context().expect("request span should have trace context") } .instrument(request_span) .await; let dispatch_span = submission_dispatch_span(&Submission { id: "sub-1".into(), op: Op::Interrupt, trace: Some(submission_trace.clone()), }); let dispatch_span_id = dispatch_span.context().span().span_context().span_id(); let (sess, tc, rx) = make_session_and_context_with_rx().await; let captured_trace = Arc::new(std::sync::Mutex::new(None)); async { sess.spawn_task( Arc::clone(&tc), vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }], TraceCaptureTask { captured_trace: Arc::clone(&captured_trace), }, ) .await; } .instrument(dispatch_span) .await; let evt = tokio::time::timeout(StdDuration::from_secs(2), rx.recv()) .await .expect("timeout waiting for turn completion") .expect("event"); assert!(matches!(evt.msg, EventMsg::TurnComplete(_))); let task_trace = captured_trace .lock() .unwrap_or_else(std::sync::PoisonError::into_inner) .clone() .expect("turn task should capture the current span trace context"); let submission_context = codex_otel::context_from_w3c_trace_context(&submission_trace).expect("submission"); let task_context = codex_otel::context_from_w3c_trace_context(&task_trace).expect("task trace"); assert_eq!( task_context.span().span_context().trace_id(), submission_context.span().span_context().trace_id() ); assert_ne!( task_context.span().span_context().span_id(), dispatch_span_id ); } #[tokio::test] async fn shutdown_and_wait_allows_multiple_waiters() { let (session, _turn_context) = make_session_and_context().await; let (tx_sub, rx_sub) = async_channel::bounded(4); let (_tx_event, rx_event) = async_channel::unbounded(); let (_agent_status_tx, agent_status) = watch::channel(AgentStatus::PendingInit); let session_loop_handle = tokio::spawn(async move { let shutdown: Submission = rx_sub.recv().await.expect("shutdown submission"); assert_eq!(shutdown.op, Op::Shutdown); tokio::time::sleep(StdDuration::from_millis(50)).await; }); let codex = Arc::new(Codex { tx_sub, rx_event, agent_status, session: Arc::new(session), session_loop_termination: session_loop_termination_from_handle(session_loop_handle), }); let waiter_1 = { let codex = Arc::clone(&codex); tokio::spawn(async move { codex.shutdown_and_wait().await }) }; let waiter_2 = { let codex = Arc::clone(&codex); tokio::spawn(async move { codex.shutdown_and_wait().await }) }; waiter_1 .await .expect("first shutdown waiter join") .expect("first shutdown waiter"); waiter_2 .await .expect("second shutdown waiter join") .expect("second shutdown waiter"); } #[tokio::test] async fn shutdown_and_wait_waits_when_shutdown_is_already_in_progress() { let (session, _turn_context) = make_session_and_context().await; let (tx_sub, rx_sub) = async_channel::bounded(4); drop(rx_sub); let (_tx_event, rx_event) = async_channel::unbounded(); let (_agent_status_tx, agent_status) = watch::channel(AgentStatus::PendingInit); let (shutdown_complete_tx, shutdown_complete_rx) = tokio::sync::oneshot::channel(); let session_loop_handle = tokio::spawn(async move { let _ = shutdown_complete_rx.await; }); let codex = Arc::new(Codex { tx_sub, rx_event, agent_status, session: Arc::new(session), session_loop_termination: session_loop_termination_from_handle(session_loop_handle), }); let waiter = { let codex = Arc::clone(&codex); tokio::spawn(async move { codex.shutdown_and_wait().await }) }; tokio::time::sleep(StdDuration::from_millis(10)).await; assert!(!waiter.is_finished()); shutdown_complete_tx .send(()) .expect("session loop should still be waiting to terminate"); waiter .await .expect("shutdown waiter join") .expect("shutdown waiter"); } #[tokio::test] async fn shutdown_and_wait_shuts_down_cached_guardian_subagent() { let (parent_session, parent_turn_context) = make_session_and_context().await; let parent_session = Arc::new(parent_session); let parent_config = Arc::clone(&parent_turn_context.config); let (parent_tx_sub, parent_rx_sub) = async_channel::bounded(4); let (_parent_tx_event, parent_rx_event) = async_channel::unbounded(); let (_parent_status_tx, parent_agent_status) = watch::channel(AgentStatus::PendingInit); let parent_session_for_loop = Arc::clone(&parent_session); let parent_session_loop_handle = tokio::spawn(async move { submission_loop(parent_session_for_loop, parent_config, parent_rx_sub).await; }); let parent_codex = Codex { tx_sub: parent_tx_sub, rx_event: parent_rx_event, agent_status: parent_agent_status, session: Arc::clone(&parent_session), session_loop_termination: session_loop_termination_from_handle(parent_session_loop_handle), }; let (child_session, _child_turn_context) = make_session_and_context().await; let (child_tx_sub, child_rx_sub) = async_channel::bounded(4); let (_child_tx_event, child_rx_event) = async_channel::unbounded(); let (_child_status_tx, child_agent_status) = watch::channel(AgentStatus::PendingInit); let (child_shutdown_tx, child_shutdown_rx) = tokio::sync::oneshot::channel(); let child_session_loop_handle = tokio::spawn(async move { let shutdown: Submission = child_rx_sub .recv() .await .expect("child shutdown submission"); assert_eq!(shutdown.op, Op::Shutdown); child_shutdown_tx .send(()) .expect("child shutdown signal should be delivered"); }); let child_codex = Codex { tx_sub: child_tx_sub, rx_event: child_rx_event, agent_status: child_agent_status, session: Arc::new(child_session), session_loop_termination: session_loop_termination_from_handle(child_session_loop_handle), }; parent_session .guardian_review_session .cache_for_test(child_codex) .await; parent_codex .shutdown_and_wait() .await .expect("parent shutdown should succeed"); child_shutdown_rx .await .expect("guardian subagent should receive a shutdown op"); } #[tokio::test] async fn shutdown_and_wait_shuts_down_tracked_ephemeral_guardian_review() { let (parent_session, parent_turn_context) = make_session_and_context().await; let parent_session = Arc::new(parent_session); let parent_config = Arc::clone(&parent_turn_context.config); let (parent_tx_sub, parent_rx_sub) = async_channel::bounded(4); let (_parent_tx_event, parent_rx_event) = async_channel::unbounded(); let (_parent_status_tx, parent_agent_status) = watch::channel(AgentStatus::PendingInit); let parent_session_for_loop = Arc::clone(&parent_session); let parent_session_loop_handle = tokio::spawn(async move { submission_loop(parent_session_for_loop, parent_config, parent_rx_sub).await; }); let parent_codex = Codex { tx_sub: parent_tx_sub, rx_event: parent_rx_event, agent_status: parent_agent_status, session: Arc::clone(&parent_session), session_loop_termination: session_loop_termination_from_handle(parent_session_loop_handle), }; let (child_session, _child_turn_context) = make_session_and_context().await; let (child_tx_sub, child_rx_sub) = async_channel::bounded(4); let (_child_tx_event, child_rx_event) = async_channel::unbounded(); let (_child_status_tx, child_agent_status) = watch::channel(AgentStatus::PendingInit); let (child_shutdown_tx, child_shutdown_rx) = tokio::sync::oneshot::channel(); let child_session_loop_handle = tokio::spawn(async move { let shutdown: Submission = child_rx_sub .recv() .await .expect("child shutdown submission"); assert_eq!(shutdown.op, Op::Shutdown); child_shutdown_tx .send(()) .expect("child shutdown signal should be delivered"); }); let child_codex = Codex { tx_sub: child_tx_sub, rx_event: child_rx_event, agent_status: child_agent_status, session: Arc::new(child_session), session_loop_termination: session_loop_termination_from_handle(child_session_loop_handle), }; parent_session .guardian_review_session .register_ephemeral_for_test(child_codex) .await; parent_codex .shutdown_and_wait() .await .expect("parent shutdown should succeed"); child_shutdown_rx .await .expect("ephemeral guardian review should receive a shutdown op"); } pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx( dynamic_tools: Vec, ) -> ( Arc, Arc, async_channel::Receiver, ) { let (tx_event, rx_event) = async_channel::unbounded(); let codex_home = tempfile::tempdir().expect("create temp dir"); let config = build_test_config(codex_home.path()).await; let config = Arc::new(config); let conversation_id = ThreadId::default(); let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let models_manager = Arc::new(ModelsManager::new( config.codex_home.clone(), auth_manager.clone(), /*model_catalog*/ None, CollaborationModesConfig::default(), )); let agent_control = AgentControl::default(); let exec_policy = Arc::new(ExecPolicyManager::default()); let (agent_status_tx, _agent_status_rx) = watch::channel(AgentStatus::PendingInit); let model = ModelsManager::get_model_offline_for_tests(config.model.as_deref()); let model_info = ModelsManager::construct_model_info_offline_for_tests( model.as_str(), &config.to_models_manager_config(), ); let reasoning_effort = config.model_reasoning_effort; let collaboration_mode = CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort, developer_instructions: None, }, }; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), user_instructions: config.user_instructions.clone(), service_tier: None, personality: config.personality, base_instructions: config .base_instructions .clone() .unwrap_or_else(|| model_info.get_model_instructions(config.personality)), compact_prompt: config.compact_prompt.clone(), surface_policy: SessionSurfacePolicy::full(), approval_policy: config.permissions.approval_policy.clone(), approvals_reviewer: config.approvals_reviewer, sandbox_policy: config.permissions.sandbox_policy.clone(), file_system_sandbox_policy: config.permissions.file_system_sandbox_policy.clone(), network_sandbox_policy: config.permissions.network_sandbox_policy, windows_sandbox_level: WindowsSandboxLevel::from_config(&config), cwd: config.cwd.clone(), codex_home: config.codex_home.clone(), thread_name: None, original_config_do_not_use: Arc::clone(&config), metrics_service_name: None, app_server_client_name: None, app_server_client_version: None, session_source: SessionSource::Exec, dynamic_tools, persist_extended_history: false, inherited_shell_snapshot: None, user_shell_override: None, }; let per_turn_config = Session::build_per_turn_config(&session_configuration); let model_info = ModelsManager::construct_model_info_offline_for_tests( session_configuration.collaboration_mode.model(), &per_turn_config.to_models_manager_config(), ); let session_telemetry = session_telemetry( conversation_id, config.as_ref(), &model_info, session_configuration.session_source.clone(), ); let state = SessionState::new(session_configuration.clone()); let plugins_manager = Arc::new(PluginsManager::new(config.codex_home.clone())); let mcp_manager = Arc::new(McpManager::new(Arc::clone(&plugins_manager))); let skills_manager = Arc::new(SkillsManager::new( config.codex_home.clone(), /*bundled_skills_enabled*/ true, )); let network_approval = Arc::new(NetworkApprovalService::default()); let environment = Arc::new( codex_exec_server::Environment::create(/*exec_server_url*/ None) .await .expect("create environment"), ); let skills_watcher = Arc::new(SkillsWatcher::noop()); let services = SessionServices { mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::new_uninitialized( &config.permissions.approval_policy, &config.permissions.sandbox_policy, ))), mcp_startup_cancellation_token: Mutex::new(CancellationToken::new()), unified_exec_manager: UnifiedExecProcessManager::new( config.background_terminal_max_timeout, ), shell_zsh_path: None, main_execve_wrapper_exe: config.main_execve_wrapper_exe.clone(), analytics_events_client: AnalyticsEventsClient::new( Arc::clone(&auth_manager), config.chatgpt_base_url.trim_end_matches('/').to_string(), config.analytics_enabled, ), hooks: Hooks::new(HooksConfig { legacy_notify_argv: config.notify.clone(), ..HooksConfig::default() }), rollout: Mutex::new(None), user_shell: Arc::new(default_user_shell()), shell_snapshot_tx: watch::channel(None).0, show_raw_agent_reasoning: config.show_raw_agent_reasoning, exec_policy, auth_manager: Arc::clone(&auth_manager), session_telemetry: session_telemetry.clone(), models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), guardian_rejection_rationales: Mutex::new(std::collections::HashMap::new()), skills_manager, plugins_manager, mcp_manager, skills_watcher, agent_control, network_proxy: None, network_approval: Arc::clone(&network_approval), state_db: None, model_client: ModelClient::new( Some(Arc::clone(&auth_manager)), conversation_id, /*installation_id*/ "11111111-1111-4111-8111-111111111111".to_string(), session_configuration.provider.clone(), session_configuration.session_source.clone(), config.model_verbosity, config.features.enabled(Feature::EnableRequestCompression), config.features.enabled(Feature::RuntimeMetrics), Session::build_model_client_beta_features_header(config.as_ref()), ), code_mode_service: crate::tools::code_mode::CodeModeService::new( config.js_repl_node_path.clone(), ), environment: Some(Arc::clone(&environment)), }; let js_repl = Arc::new(JsReplHandle::with_node_path( config.js_repl_node_path.clone(), config.js_repl_node_module_dirs.clone(), )); let plugin_outcome = services .plugins_manager .plugins_for_config(&per_turn_config); let effective_skill_roots = plugin_outcome.effective_skill_roots(); let skills_input = crate::skills_load_input_from_config(&per_turn_config, effective_skill_roots); let skills_outcome = Arc::new(services.skills_manager.skills_for_config(&skills_input)); let turn_context = Arc::new(Session::make_turn_context( conversation_id, Some(Arc::clone(&auth_manager)), &session_telemetry, session_configuration.provider.clone(), &session_configuration, services.user_shell.as_ref(), services.shell_zsh_path.as_ref(), services.main_execve_wrapper_exe.as_ref(), per_turn_config, model_info, &models_manager, /*network*/ None, Some(environment), "turn_id".to_string(), Arc::clone(&js_repl), skills_outcome, )); let (mailbox, mailbox_rx) = crate::agent::Mailbox::new(); let session = Arc::new(Session { conversation_id, tx_event, agent_status: agent_status_tx, out_of_band_elicitation_paused: watch::channel(false).0, state: Mutex::new(state), managed_network_proxy_refresh_lock: Mutex::new(()), features: config.features.clone(), pending_mcp_server_refresh_config: Mutex::new(None), conversation: Arc::new(RealtimeConversationManager::new()), active_turn: Mutex::new(None), mailbox, mailbox_rx: Mutex::new(mailbox_rx), idle_pending_input: Mutex::new(Vec::new()), guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, js_repl, next_internal_sub_id: AtomicU64::new(0), }); (session, turn_context, rx_event) } // Like make_session_and_context, but returns Arc and the event receiver // so tests can assert on emitted events. pub(crate) async fn make_session_and_context_with_rx() -> ( Arc, Arc, async_channel::Receiver, ) { make_session_and_context_with_dynamic_tools_and_rx(Vec::new()).await } #[tokio::test] async fn refresh_mcp_servers_is_deferred_until_next_turn() { let (session, turn_context) = make_session_and_context().await; let old_token = session.mcp_startup_cancellation_token().await; assert!(!old_token.is_cancelled()); let mcp_oauth_credentials_store_mode = serde_json::to_value(OAuthCredentialsStoreMode::Auto).expect("serialize store mode"); let refresh_config = McpServerRefreshConfig { mcp_servers: json!({}), mcp_oauth_credentials_store_mode, }; { let mut guard = session.pending_mcp_server_refresh_config.lock().await; *guard = Some(refresh_config); } assert!(!old_token.is_cancelled()); assert!( session .pending_mcp_server_refresh_config .lock() .await .is_some() ); session .refresh_mcp_servers_if_requested(&turn_context) .await; assert!(old_token.is_cancelled()); assert!( session .pending_mcp_server_refresh_config .lock() .await .is_none() ); let new_token = session.mcp_startup_cancellation_token().await; assert!(!new_token.is_cancelled()); } #[tokio::test] async fn record_model_warning_appends_user_message() { let (mut session, turn_context) = make_session_and_context().await; let features = Features::with_defaults().into(); session.features = features; session .record_model_warning("too many unified exec processes", &turn_context) .await; let history = session.clone_history().await; let history_items = history.raw_items(); let last = history_items.last().expect("warning recorded"); match last { ResponseItem::Message { role, content, .. } => { assert_eq!(role, "user"); assert_eq!( content, &vec![ContentItem::InputText { text: "Warning: too many unified exec processes".to_string(), }] ); } other => panic!("expected user message, got {other:?}"), } } #[tokio::test] async fn spawn_task_does_not_update_previous_turn_settings_for_non_run_turn_tasks() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; sess.set_previous_turn_settings(/*previous_turn_settings*/ None) .await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; assert_eq!(sess.previous_turn_settings().await, None); } #[tokio::test] async fn build_settings_update_items_emits_environment_item_for_network_changes() { let (session, previous_context) = make_session_and_context().await; let previous_context = Arc::new(previous_context); let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; let mut config = (*current_context.config).clone(); let mut requirements = config.config_layer_stack.requirements().clone(); requirements.network = Some(Sourced::new( NetworkConstraints { domains: Some(NetworkDomainPermissionsToml { entries: std::collections::BTreeMap::from([ ( "api.example.com".to_string(), NetworkDomainPermissionToml::Allow, ), ( "blocked.example.com".to_string(), NetworkDomainPermissionToml::Deny, ), ]), }), ..Default::default() }, RequirementSource::CloudRequirements, )); let layers = config .config_layer_stack .get_layers( ConfigLayerStackOrdering::LowestPrecedenceFirst, /*include_disabled*/ true, ) .into_iter() .cloned() .collect(); config.config_layer_stack = ConfigLayerStack::new( layers, requirements, config.config_layer_stack.requirements_toml().clone(), ) .expect("rebuild config layer stack with network requirements"); current_context.config = Arc::new(config); let reference_context_item = previous_context.to_turn_context_item(); let update_items = session .build_settings_update_items(Some(&reference_context_item), ¤t_context) .await; let environment_update = user_input_texts(&update_items) .into_iter() .find(|text| text.contains("")) .expect("environment update item should be emitted"); assert!(environment_update.contains("")); assert!(environment_update.contains("api.example.com")); assert!(environment_update.contains("blocked.example.com")); } #[tokio::test] async fn build_settings_update_items_emits_environment_item_for_time_changes() { let (session, previous_context) = make_session_and_context().await; let previous_context = Arc::new(previous_context); let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; current_context.current_date = Some("2026-02-27".to_string()); current_context.timezone = Some("Europe/Berlin".to_string()); let reference_context_item = previous_context.to_turn_context_item(); let update_items = session .build_settings_update_items(Some(&reference_context_item), ¤t_context) .await; let environment_update = user_input_texts(&update_items) .into_iter() .find(|text| text.contains("")) .expect("environment update item should be emitted"); assert!(environment_update.contains("2026-02-27")); assert!(environment_update.contains("Europe/Berlin")); } #[tokio::test] async fn build_settings_update_items_omits_environment_item_when_disabled() { let (session, previous_context) = make_session_and_context().await; let previous_context = Arc::new(previous_context); let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; let mut config = (*current_context.config).clone(); config.include_environment_context = false; current_context.config = Arc::new(config); current_context.current_date = Some("2026-02-27".to_string()); let reference_context_item = previous_context.to_turn_context_item(); let update_items = session .build_settings_update_items(Some(&reference_context_item), ¤t_context) .await; let user_texts = user_input_texts(&update_items); assert!( !user_texts .iter() .any(|text| text.contains("")), "did not expect environment context updates when disabled, got {user_texts:?}" ); } #[tokio::test] async fn build_settings_update_items_emits_realtime_start_when_session_becomes_live() { let (session, previous_context) = make_session_and_context().await; let previous_context = Arc::new(previous_context); let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; current_context.realtime_active = true; let update_items = session .build_settings_update_items( Some(&previous_context.to_turn_context_item()), ¤t_context, ) .await; let developer_texts = developer_input_texts(&update_items); assert!( developer_texts .iter() .any(|text| text.contains("")), "expected a realtime start update, got {developer_texts:?}" ); } #[tokio::test] async fn build_settings_update_items_emits_realtime_end_when_session_stops_being_live() { let (session, mut previous_context) = make_session_and_context().await; previous_context.realtime_active = true; let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; current_context.realtime_active = false; let update_items = session .build_settings_update_items( Some(&previous_context.to_turn_context_item()), ¤t_context, ) .await; let developer_texts = developer_input_texts(&update_items); assert!( developer_texts .iter() .any(|text| text.contains("Reason: inactive")), "expected a realtime end update, got {developer_texts:?}" ); } #[tokio::test] async fn build_settings_update_items_uses_previous_turn_settings_for_realtime_end() { let (session, previous_context) = make_session_and_context().await; let mut previous_context_item = previous_context.to_turn_context_item(); previous_context_item.realtime_active = None; let previous_turn_settings = PreviousTurnSettings { model: previous_context.model_info.slug.clone(), realtime_active: Some(true), }; let mut current_context = previous_context .with_model( previous_context.model_info.slug.clone(), &session.services.models_manager, ) .await; current_context.realtime_active = false; session .set_previous_turn_settings(Some(previous_turn_settings)) .await; let update_items = session .build_settings_update_items(Some(&previous_context_item), ¤t_context) .await; let developer_texts = developer_input_texts(&update_items); assert!( developer_texts .iter() .any(|text| text.contains("Reason: inactive")), "expected a realtime end update from previous turn settings, got {developer_texts:?}" ); } #[tokio::test] async fn build_initial_context_uses_previous_realtime_state() { let (session, mut turn_context) = make_session_and_context().await; turn_context.realtime_active = true; let initial_context = session.build_initial_context(&turn_context).await; let developer_texts = developer_input_texts(&initial_context); assert!( developer_texts .iter() .any(|text| text.contains("")), "expected initial context to describe active realtime state, got {developer_texts:?}" ); let previous_context_item = turn_context.to_turn_context_item(); { let mut state = session.state.lock().await; state.set_reference_context_item(Some(previous_context_item)); } let resumed_context = session.build_initial_context(&turn_context).await; let resumed_developer_texts = developer_input_texts(&resumed_context); assert!( !resumed_developer_texts .iter() .any(|text| text.contains("")), "did not expect a duplicate realtime update, got {resumed_developer_texts:?}" ); } #[tokio::test] async fn build_initial_context_omits_default_image_save_location_with_image_history() { let (session, turn_context) = make_session_and_context().await; session .replace_history( vec![ResponseItem::ImageGenerationCall { id: "ig-test".to_string(), status: "completed".to_string(), revised_prompt: Some("a tiny blue square".to_string()), result: "Zm9v".to_string(), }], /*reference_context_item*/ None, ) .await; let initial_context = session.build_initial_context(&turn_context).await; let developer_texts = developer_input_texts(&initial_context); assert!( !developer_texts .iter() .any(|text| text.contains("Generated images are saved to")), "expected initial context to omit image save instructions even with image history, got {developer_texts:?}" ); } #[tokio::test] async fn build_initial_context_omits_default_image_save_location_without_image_history() { let (session, turn_context) = make_session_and_context().await; let initial_context = session.build_initial_context(&turn_context).await; let developer_texts = developer_input_texts(&initial_context); assert!( !developer_texts .iter() .any(|text| text.contains("Generated images are saved to")), "expected initial context to omit image save instructions without image history, got {developer_texts:?}" ); } #[tokio::test] async fn handle_output_item_done_records_image_save_history_message() { let (session, turn_context) = make_session_and_context().await; let session = Arc::new(session); let turn_context = Arc::new(turn_context); let call_id = "ig_history_records_message"; let expected_saved_path = crate::stream_events_utils::image_generation_artifact_path( turn_context.config.codex_home.as_path(), &session.conversation_id.to_string(), call_id, ); let _ = std::fs::remove_file(&expected_saved_path); let item = ResponseItem::ImageGenerationCall { id: call_id.to_string(), status: "completed".to_string(), revised_prompt: Some("a tiny blue square".to_string()), result: "Zm9v".to_string(), }; let mut ctx = HandleOutputCtx { sess: Arc::clone(&session), turn_context: Arc::clone(&turn_context), tool_runtime: test_tool_runtime(Arc::clone(&session), Arc::clone(&turn_context)), cancellation_token: CancellationToken::new(), }; handle_output_item_done(&mut ctx, item.clone(), /*previously_active_item*/ None) .await .expect("image generation item should succeed"); let history = session.clone_history().await; let image_output_path = crate::stream_events_utils::image_generation_artifact_path( turn_context.config.codex_home.as_path(), &session.conversation_id.to_string(), "", ); let image_output_dir = image_output_path .parent() .expect("generated image path should have a parent"); let image_message: ResponseItem = DeveloperInstructions::new(format!( "Generated images are saved to {} as {} by default.\nIf you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.", image_output_dir.display(), image_output_path.display(), )) .into(); assert_eq!(history.raw_items(), &[image_message, item]); assert_eq!( std::fs::read(&expected_saved_path).expect("saved file"), b"foo" ); let _ = std::fs::remove_file(&expected_saved_path); } #[tokio::test] async fn handle_output_item_done_skips_image_save_message_when_save_fails() { let (session, turn_context) = make_session_and_context().await; let session = Arc::new(session); let turn_context = Arc::new(turn_context); let call_id = "ig_history_no_message"; let expected_saved_path = crate::stream_events_utils::image_generation_artifact_path( turn_context.config.codex_home.as_path(), &session.conversation_id.to_string(), call_id, ); let _ = std::fs::remove_file(&expected_saved_path); let item = ResponseItem::ImageGenerationCall { id: call_id.to_string(), status: "completed".to_string(), revised_prompt: Some("broken payload".to_string()), result: "_-8".to_string(), }; let mut ctx = HandleOutputCtx { sess: Arc::clone(&session), turn_context: Arc::clone(&turn_context), tool_runtime: test_tool_runtime(Arc::clone(&session), Arc::clone(&turn_context)), cancellation_token: CancellationToken::new(), }; handle_output_item_done(&mut ctx, item.clone(), /*previously_active_item*/ None) .await .expect("image generation item should still complete"); let history = session.clone_history().await; assert_eq!(history.raw_items(), &[item]); assert!(!expected_saved_path.exists()); } #[tokio::test] async fn build_initial_context_uses_previous_turn_settings_for_realtime_end() { let (session, turn_context) = make_session_and_context().await; let previous_turn_settings = PreviousTurnSettings { model: turn_context.model_info.slug.clone(), realtime_active: Some(true), }; session .set_previous_turn_settings(Some(previous_turn_settings)) .await; let initial_context = session.build_initial_context(&turn_context).await; let developer_texts = developer_input_texts(&initial_context); assert!( developer_texts .iter() .any(|text| text.contains("Reason: inactive")), "expected initial context to describe an ended realtime session, got {developer_texts:?}" ); } #[tokio::test] async fn build_initial_context_restates_realtime_start_when_reference_context_is_missing() { let (session, mut turn_context) = make_session_and_context().await; turn_context.realtime_active = true; let previous_turn_settings = PreviousTurnSettings { model: turn_context.model_info.slug.clone(), realtime_active: Some(true), }; session .set_previous_turn_settings(Some(previous_turn_settings)) .await; let initial_context = session.build_initial_context(&turn_context).await; let developer_texts = developer_input_texts(&initial_context); assert!( developer_texts .iter() .any(|text| text.contains("")), "expected initial context to restate active realtime when the reference context is missing, got {developer_texts:?}" ); } #[tokio::test] async fn record_context_updates_and_set_reference_context_item_injects_full_context_when_baseline_missing() { let (session, turn_context) = make_session_and_context().await; session .record_context_updates_and_set_reference_context_item(&turn_context) .await; let history = session.clone_history().await; let initial_context = session.build_initial_context(&turn_context).await; assert_eq!(history.raw_items().to_vec(), initial_context); let current_context = session.reference_context_item().await; assert_eq!( serde_json::to_value(current_context).expect("serialize current context item"), serde_json::to_value(Some(turn_context.to_turn_context_item())) .expect("serialize expected context item") ); } #[tokio::test] async fn record_context_updates_and_set_reference_context_item_reinjects_full_context_after_clear() { let (session, turn_context) = make_session_and_context().await; let compacted_summary = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: format!("{}\nsummary", crate::compact::SUMMARY_PREFIX), }], end_turn: None, phase: None, }; session .record_into_history(std::slice::from_ref(&compacted_summary), &turn_context) .await; session .record_context_updates_and_set_reference_context_item(&turn_context) .await; { let mut state = session.state.lock().await; state.set_reference_context_item(/*item*/ None); } session .replace_history( vec![compacted_summary.clone()], /*reference_context_item*/ None, ) .await; session .record_context_updates_and_set_reference_context_item(&turn_context) .await; let history = session.clone_history().await; let mut expected_history = vec![compacted_summary]; expected_history.extend(session.build_initial_context(&turn_context).await); assert_eq!(history.raw_items().to_vec(), expected_history); } #[tokio::test] async fn record_context_updates_and_set_reference_context_item_persists_baseline_without_emitting_diffs() { let (session, previous_context) = make_session_and_context().await; let next_model = if previous_context.model_info.slug == "gpt-5.1" { "gpt-5" } else { "gpt-5.1" }; let turn_context = previous_context .with_model(next_model.to_string(), &session.services.models_manager) .await; let previous_context_item = previous_context.to_turn_context_item(); { let mut state = session.state.lock().await; state.set_reference_context_item(Some(previous_context_item.clone())); } let config = session.get_config().await; let recorder = RolloutRecorder::new( config.as_ref(), RolloutRecorderParams::new( ThreadId::default(), /*forked_from_id*/ None, SessionSource::Exec, BaseInstructions::default(), Vec::new(), EventPersistenceMode::Limited, ), /*state_db_ctx*/ None, /*state_builder*/ None, ) .await .expect("create rollout recorder"); let rollout_path = recorder.rollout_path().to_path_buf(); { let mut rollout = session.services.rollout.lock().await; *rollout = Some(recorder); } let update_items = session .build_settings_update_items(Some(&previous_context_item), &turn_context) .await; assert_eq!(update_items, Vec::new()); session .record_context_updates_and_set_reference_context_item(&turn_context) .await; assert_eq!( session.clone_history().await.raw_items().to_vec(), Vec::new() ); assert_eq!( serde_json::to_value(session.reference_context_item().await) .expect("serialize current context item"), serde_json::to_value(Some(turn_context.to_turn_context_item())) .expect("serialize expected context item") ); session.ensure_rollout_materialized().await; session.flush_rollout().await; let InitialHistory::Resumed(resumed) = RolloutRecorder::get_rollout_history(&rollout_path) .await .expect("read rollout history") else { panic!("expected resumed rollout history"); }; let persisted_turn_context = resumed.history.iter().find_map(|item| match item { RolloutItem::TurnContext(ctx) => Some(ctx.clone()), _ => None, }); assert_eq!( serde_json::to_value(persisted_turn_context) .expect("serialize persisted turn context item"), serde_json::to_value(Some(turn_context.to_turn_context_item())) .expect("serialize expected turn context item") ); } #[tokio::test] async fn build_initial_context_prepends_model_switch_message() { let (session, turn_context) = make_session_and_context().await; let previous_turn_settings = PreviousTurnSettings { model: "previous-regular-model".to_string(), realtime_active: None, }; session .set_previous_turn_settings(Some(previous_turn_settings)) .await; let initial_context = session.build_initial_context(&turn_context).await; let ResponseItem::Message { role, content, .. } = &initial_context[0] else { panic!("expected developer message"); }; assert_eq!(role, "developer"); let [ContentItem::InputText { text }, ..] = content.as_slice() else { panic!("expected developer text"); }; assert!(text.contains("")); } #[tokio::test] async fn record_context_updates_and_set_reference_context_item_persists_full_reinjection_to_rollout() { let (session, previous_context) = make_session_and_context().await; let next_model = if previous_context.model_info.slug == "gpt-5.1" { "gpt-5" } else { "gpt-5.1" }; let turn_context = previous_context .with_model(next_model.to_string(), &session.services.models_manager) .await; let config = session.get_config().await; let recorder = RolloutRecorder::new( config.as_ref(), RolloutRecorderParams::new( ThreadId::default(), /*forked_from_id*/ None, SessionSource::Exec, BaseInstructions::default(), Vec::new(), EventPersistenceMode::Limited, ), /*state_db_ctx*/ None, /*state_builder*/ None, ) .await .expect("create rollout recorder"); let rollout_path = recorder.rollout_path().to_path_buf(); { let mut rollout = session.services.rollout.lock().await; *rollout = Some(recorder); } session .persist_rollout_items(&[RolloutItem::EventMsg(EventMsg::UserMessage( UserMessageEvent { message: "seed rollout".to_string(), images: None, local_images: Vec::new(), text_elements: Vec::new(), }, ))]) .await; { let mut state = session.state.lock().await; state.set_reference_context_item(/*item*/ None); } session .set_previous_turn_settings(Some(PreviousTurnSettings { model: previous_context.model_info.slug.clone(), realtime_active: Some(previous_context.realtime_active), })) .await; session .record_context_updates_and_set_reference_context_item(&turn_context) .await; session.ensure_rollout_materialized().await; session.flush_rollout().await; let InitialHistory::Resumed(resumed) = RolloutRecorder::get_rollout_history(&rollout_path) .await .expect("read rollout history") else { panic!("expected resumed rollout history"); }; let persisted_turn_context = resumed.history.iter().find_map(|item| match item { RolloutItem::TurnContext(ctx) => Some(ctx.clone()), _ => None, }); assert_eq!( serde_json::to_value(persisted_turn_context) .expect("serialize persisted turn context item"), serde_json::to_value(Some(turn_context.to_turn_context_item())) .expect("serialize expected turn context item") ); } #[tokio::test] async fn run_user_shell_command_does_not_set_reference_context_item() { let (session, _turn_context, rx) = make_session_and_context_with_rx().await; { let mut state = session.state.lock().await; state.set_reference_context_item(/*item*/ None); } handlers::run_user_shell_command(&session, "sub-id".to_string(), "echo shell".to_string()) .await; let deadline = StdDuration::from_secs(15); let start = std::time::Instant::now(); loop { let remaining = deadline.saturating_sub(start.elapsed()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); if matches!(evt.msg, EventMsg::TurnComplete(_)) { break; } } assert!( session.reference_context_item().await.is_none(), "standalone shell tasks should not mutate previous context" ); } #[derive(Clone, Copy)] struct NeverEndingTask { kind: TaskKind, listen_to_cancellation_token: bool, } impl SessionTask for NeverEndingTask { fn kind(&self) -> TaskKind { self.kind } fn span_name(&self) -> &'static str { "session_task.never_ending" } async fn run( self: Arc, _session: Arc, _ctx: Arc, _input: Vec, cancellation_token: CancellationToken, ) -> Option { if self.listen_to_cancellation_token { cancellation_token.cancelled().await; return None; } loop { sleep(Duration::from_secs(60)).await; } } } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[test_log::test] async fn abort_regular_task_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Interrupts persist a model-visible `` marker into history, but there is no // separate client-visible event for that marker (only `EventMsg::TurnAborted`). let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason), other => panic!("unexpected event: {other:?}"), } // No extra events should be emitted after an abort. assert!(rx.try_recv().is_err()); } #[tokio::test] async fn abort_gracefully_emits_turn_aborted_only() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Even if tasks handle cancellation gracefully, interrupts still result in `TurnAborted` // being the only client-visible signal. let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("timeout waiting for event") .expect("event"); match evt.msg { EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason), other => panic!("unexpected event: {other:?}"), } // No extra events should be emitted after an abort. assert!(rx.try_recv().is_err()); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn task_finish_emits_turn_item_lifecycle_for_leftover_pending_user_input() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; while rx.try_recv().is_ok() {} sess.inject_response_items(vec![ResponseInputItem::Message { role: "user".to_string(), content: vec![ContentItem::InputText { text: "late pending input".to_string(), }], }]) .await .expect("inject pending input into active turn"); sess.on_task_finished(Arc::clone(&tc), /*last_agent_message*/ None) .await; let history = sess.clone_history().await; let expected = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "late pending input".to_string(), }], end_turn: None, phase: None, }; assert!( history.raw_items().iter().any(|item| item == &expected), "expected pending input to be persisted into history on turn completion" ); let first = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected raw response item event") .expect("channel open"); assert!(matches!(first.msg, EventMsg::RawResponseItem(_))); let second = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected item started event") .expect("channel open"); assert!(matches!( second.msg, EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::UserMessage(UserMessageItem { content, .. }), .. }) if content == vec![UserInput::Text { text: "late pending input".to_string(), text_elements: Vec::new(), }] )); let third = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected item completed event") .expect("channel open"); assert!(matches!( third.msg, EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::UserMessage(UserMessageItem { content, .. }), .. }) if content == vec![UserInput::Text { text: "late pending input".to_string(), text_elements: Vec::new(), }] )); let fourth = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected legacy user message event") .expect("channel open"); assert!(matches!( fourth.msg, EventMsg::UserMessage(UserMessageEvent { message, images, text_elements, local_images, }) if message == "late pending input" && images == Some(Vec::new()) && text_elements.is_empty() && local_images.is_empty() )); let fifth = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) .await .expect("expected turn complete event") .expect("channel open"); assert!(matches!( fifth.msg, EventMsg::TurnComplete(TurnCompleteEvent { turn_id, last_agent_message: None, .. }) if turn_id == tc.sub_id )); } #[tokio::test] async fn steer_input_requires_active_turn() { let (sess, _tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let err = sess .steer_input(input, /*expected_turn_id*/ None) .await .expect_err("steering without active turn should fail"); assert!(matches!(err, SteerInputError::NoActiveTurn(_))); } #[tokio::test] async fn steer_input_enforces_expected_turn_id() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; let steer_input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let err = sess .steer_input(steer_input, Some("different-turn-id")) .await .expect_err("mismatched expected turn id should fail"); match err { SteerInputError::ExpectedTurnMismatch { expected, actual } => { assert_eq!( (expected, actual), ("different-turn-id".to_string(), tc.sub_id.clone()) ); } other => panic!("unexpected error: {other:?}"), } } #[tokio::test] async fn steer_input_rejects_non_regular_turns() { for (task_kind, turn_kind) in [ (TaskKind::Review, NonSteerableTurnKind::Review), (TaskKind::Compact, NonSteerableTurnKind::Compact), ] { let (sess, _tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; let turn_context = sess.new_default_turn_with_sub_id("turn".to_string()).await; sess.spawn_task( turn_context, input, NeverEndingTask { kind: task_kind, listen_to_cancellation_token: true, }, ) .await; let steer_input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let err = sess .steer_input(steer_input, /*expected_turn_id*/ None) .await .expect_err("steering a non-regular turn should fail"); assert_eq!(err, SteerInputError::ActiveTurnNotSteerable { turn_kind }); sess.abort_all_tasks(TurnAbortReason::Interrupted).await; } } #[tokio::test] async fn steer_input_returns_active_turn_id() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; let steer_input = vec![UserInput::Text { text: "steer".to_string(), text_elements: Vec::new(), }]; let turn_id = sess .steer_input(steer_input, Some(&tc.sub_id)) .await .expect("steering with matching expected turn id should succeed"); assert_eq!(turn_id, tc.sub_id); assert!(sess.has_pending_input().await); } #[tokio::test] async fn prepend_pending_input_keeps_older_tail_ahead_of_newer_input() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "hello".to_string(), text_elements: Vec::new(), }]; sess.spawn_task( Arc::clone(&tc), input, NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; let blocked = ResponseInputItem::Message { role: "user".to_string(), content: vec![ContentItem::InputText { text: "blocked queued prompt".to_string(), }], }; let later = ResponseInputItem::Message { role: "user".to_string(), content: vec![ContentItem::InputText { text: "later queued prompt".to_string(), }], }; let newer = ResponseInputItem::Message { role: "user".to_string(), content: vec![ContentItem::InputText { text: "newer queued prompt".to_string(), }], }; sess.inject_response_items(vec![blocked.clone(), later.clone()]) .await .expect("inject initial pending input into active turn"); let drained = sess.get_pending_input().await; assert_eq!(drained, vec![blocked, later.clone()]); sess.inject_response_items(vec![newer.clone()]) .await .expect("inject newer pending input into active turn"); let mut drained_iter = drained.into_iter(); let _blocked = drained_iter.next().expect("blocked prompt should exist"); sess.prepend_pending_input(drained_iter.collect()) .await .expect("requeue later pending input at the front of the queue"); assert_eq!(sess.get_pending_input().await, vec![later, newer]); } #[tokio::test] async fn queued_response_items_for_next_turn_move_into_next_active_turn() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let queued_item = ResponseInputItem::Message { role: "assistant".to_string(), content: vec![ContentItem::InputText { text: "queued before wake".to_string(), }], }; sess.queue_response_items_for_next_turn(vec![queued_item.clone()]) .await; sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: false, }, ) .await; assert_eq!(sess.get_pending_input().await, vec![queued_item]); } #[tokio::test] async fn queue_only_mailbox_mail_waits_for_next_turn_after_answer_boundary() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let communication = InterAgentCommunication::new( AgentPath::try_from("/root/worker").expect("worker path should parse"), AgentPath::root(), Vec::new(), "late queue-only update".to_string(), /*trigger_turn*/ false, ); sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; sess.enqueue_mailbox_communication(communication.clone()); assert!( !sess.has_pending_input().await, "queue-only mailbox mail should stay buffered once the current turn emitted its answer" ); assert_eq!(sess.get_pending_input().await, Vec::new()); sess.abort_all_tasks(TurnAbortReason::Replaced).await; assert_eq!( sess.get_pending_input().await, vec![communication.to_response_input_item()], ); } #[tokio::test] async fn trigger_turn_mailbox_mail_waits_for_next_turn_after_answer_boundary() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; sess.enqueue_mailbox_communication(InterAgentCommunication::new( AgentPath::try_from("/root/worker").expect("worker path should parse"), AgentPath::root(), Vec::new(), "late trigger update".to_string(), /*trigger_turn*/ true, )); assert!( !sess.has_pending_input().await, "trigger-turn mailbox mail should not extend the current turn after its answer boundary" ); sess.abort_all_tasks(TurnAbortReason::Replaced).await; assert!(sess.has_trigger_turn_mailbox_items().await); } #[tokio::test] async fn steered_input_reopens_mailbox_delivery_for_current_turn() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let communication = InterAgentCommunication::new( AgentPath::try_from("/root/worker").expect("worker path should parse"), AgentPath::root(), Vec::new(), "queued child update".to_string(), /*trigger_turn*/ false, ); sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; sess.enqueue_mailbox_communication(communication.clone()); sess.steer_input( vec![UserInput::Text { text: "follow up".to_string(), text_elements: Vec::new(), }], Some(&tc.sub_id), ) .await .expect("steered input should be accepted"); assert_eq!( sess.get_pending_input().await, vec![ ResponseInputItem::from(vec![UserInput::Text { text: "follow up".to_string(), text_elements: Vec::new(), }]), communication.to_response_input_item(), ], ); } #[tokio::test] async fn stale_defer_mailbox_delivery_does_not_override_steered_input() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let communication = InterAgentCommunication::new( AgentPath::try_from("/root/worker").expect("worker path should parse"), AgentPath::root(), Vec::new(), "queued child update".to_string(), /*trigger_turn*/ false, ); sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; sess.enqueue_mailbox_communication(communication.clone()); sess.steer_input( vec![UserInput::Text { text: "follow up".to_string(), text_elements: Vec::new(), }], Some(&tc.sub_id), ) .await .expect("steered input should be accepted"); sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; assert_eq!( sess.get_pending_input().await, vec![ ResponseInputItem::from(vec![UserInput::Text { text: "follow up".to_string(), text_elements: Vec::new(), }]), communication.to_response_input_item(), ], ); } #[tokio::test] async fn tool_calls_reopen_mailbox_delivery_for_current_turn() { let (sess, tc, _rx) = make_session_and_context_with_rx().await; let communication = InterAgentCommunication::new( AgentPath::try_from("/root/worker").expect("worker path should parse"), AgentPath::root(), Vec::new(), "queued child update".to_string(), /*trigger_turn*/ false, ); sess.spawn_task( Arc::clone(&tc), Vec::new(), NeverEndingTask { kind: TaskKind::Regular, listen_to_cancellation_token: true, }, ) .await; sess.defer_mailbox_delivery_to_next_turn(&tc.sub_id).await; sess.enqueue_mailbox_communication(communication.clone()); let item = ResponseItem::FunctionCall { id: None, name: "test_tool".to_string(), namespace: None, arguments: "{}".to_string(), call_id: "call-1".to_string(), }; let mut ctx = HandleOutputCtx { sess: Arc::clone(&sess), turn_context: Arc::clone(&tc), tool_runtime: test_tool_runtime(Arc::clone(&sess), Arc::clone(&tc)), cancellation_token: CancellationToken::new(), }; let output = handle_output_item_done(&mut ctx, item, /*previously_active_item*/ None) .await .expect("tool call should be handled"); assert!(output.needs_follow_up); assert!(output.tool_future.is_some()); assert_eq!( sess.get_pending_input().await, vec![communication.to_response_input_item()], ); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn abort_review_task_emits_exited_then_aborted_and_records_history() { let (sess, tc, rx) = make_session_and_context_with_rx().await; let input = vec![UserInput::Text { text: "start review".to_string(), text_elements: Vec::new(), }]; sess.spawn_task(Arc::clone(&tc), input, ReviewTask::new()) .await; sess.abort_all_tasks(TurnAbortReason::Interrupted).await; // Aborting a review task should exit review mode before surfacing the abort to the client. // We scan for these events (rather than relying on fixed ordering) since unrelated events // may interleave. let mut exited_review_mode_idx = None; let mut turn_aborted_idx = None; let mut idx = 0usize; let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(3); while tokio::time::Instant::now() < deadline { let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); let evt = tokio::time::timeout(remaining, rx.recv()) .await .expect("timeout waiting for event") .expect("event"); let event_idx = idx; idx = idx.saturating_add(1); match evt.msg { EventMsg::ExitedReviewMode(ev) => { assert!(ev.review_output.is_none()); exited_review_mode_idx = Some(event_idx); } EventMsg::TurnAborted(ev) => { assert_eq!(TurnAbortReason::Interrupted, ev.reason); turn_aborted_idx = Some(event_idx); break; } _ => {} } } assert!( exited_review_mode_idx.is_some(), "expected ExitedReviewMode after abort" ); assert!( turn_aborted_idx.is_some(), "expected TurnAborted after abort" ); assert!( exited_review_mode_idx.unwrap() < turn_aborted_idx.unwrap(), "expected ExitedReviewMode before TurnAborted" ); let history = sess.clone_history().await; // The `` marker is silent in the event stream, so verify it is still // recorded in history for the model. assert!( history.raw_items().iter().any(|item| { let ResponseItem::Message { role, content, .. } = item else { return false; }; if role != "user" { return false; } content.iter().any(|content_item| { let ContentItem::InputText { text } = content_item else { return false; }; text.contains(crate::contextual_user_message::TURN_ABORTED_OPEN_TAG) }) }), "expected a model-visible turn aborted marker in history after interrupt" ); } #[tokio::test] async fn fatal_tool_error_stops_turn_and_reports_error() { let (session, turn_context, _rx) = make_session_and_context_with_rx().await; let tools = { session .services .mcp_connection_manager .read() .await .list_all_tools() .await }; let app_tools = Some(tools.clone()); let mcp_tool_router_inputs = crate::tools::router::map_mcp_tool_infos(&tools); let router = ToolRouter::from_config( &turn_context.tools_config, crate::tools::router::ToolRouterParams { mcp_tools: Some(mcp_tool_router_inputs.mcp_tools), tool_namespaces: Some(mcp_tool_router_inputs.tool_namespaces), app_tools, discoverable_tools: None, dynamic_tools: turn_context.dynamic_tools.as_slice(), }, ); let item = ResponseItem::CustomToolCall { id: None, status: None, call_id: "call-1".to_string(), name: "shell".to_string(), input: "{}".to_string(), }; let call = ToolRouter::build_tool_call(session.as_ref(), item.clone()) .await .expect("build tool call") .expect("tool call present"); let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let err = router .dispatch_tool_call_with_code_mode_result( Arc::clone(&session), Arc::clone(&turn_context), tracker, call, ToolCallSource::Direct, ) .await .err() .expect("expected fatal error"); match err { FunctionCallError::Fatal(message) => { assert_eq!(message, "tool shell invoked with incompatible payload"); } other => panic!("expected FunctionCallError::Fatal, got {other:?}"), } } async fn sample_rollout( session: &Session, _turn_context: &TurnContext, ) -> (Vec, Vec) { let mut rollout_items = Vec::new(); let mut live_history = ContextManager::new(); // Use the same turn_context source as record_initial_history so model_info (and thus // personality_spec) matches reconstruction. let reconstruction_turn = session.new_default_turn().await; let mut initial_context = session .build_initial_context(reconstruction_turn.as_ref()) .await; // Ensure personality_spec is present when Personality is enabled, so expected matches // what reconstruction produces (build_initial_context may omit it when baked into model). if !initial_context.iter().any(|m| { matches!(m, ResponseItem::Message { role, content, .. } if role == "developer" && content.iter().any(|c| { matches!(c, ContentItem::InputText { text } if text.contains("")) })) }) && let Some(p) = reconstruction_turn.personality && session.features.enabled(Feature::Personality) && let Some(personality_message) = reconstruction_turn .model_info .model_messages .as_ref() .and_then(|m| m.get_personality_message(Some(p)).filter(|s| !s.is_empty())) { let msg = DeveloperInstructions::personality_spec_message(personality_message).into(); let insert_at = initial_context .iter() .position(|m| matches!(m, ResponseItem::Message { role, .. } if role == "developer")) .map(|i| i + 1) .unwrap_or(0); initial_context.insert(insert_at, msg); } for item in &initial_context { rollout_items.push(RolloutItem::ResponseItem(item.clone())); } live_history.record_items( initial_context.iter(), reconstruction_turn.truncation_policy, ); let user1 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "first user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user1), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user1.clone())); let assistant1 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply one".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant1), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant1.clone())); let summary1 = "summary one"; let snapshot1 = live_history .clone() .for_prompt(&reconstruction_turn.model_info.input_modalities); let user_messages1 = collect_user_messages(&snapshot1); let rebuilt1 = compact::build_compacted_history(Vec::new(), &user_messages1, summary1); live_history.replace(rebuilt1); rollout_items.push(RolloutItem::Compacted(CompactedItem { message: summary1.to_string(), replacement_history: None, })); let user2 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "second user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user2), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user2.clone())); let assistant2 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply two".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant2), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant2.clone())); let summary2 = "summary two"; let snapshot2 = live_history .clone() .for_prompt(&reconstruction_turn.model_info.input_modalities); let user_messages2 = collect_user_messages(&snapshot2); let rebuilt2 = compact::build_compacted_history(Vec::new(), &user_messages2, summary2); live_history.replace(rebuilt2); rollout_items.push(RolloutItem::Compacted(CompactedItem { message: summary2.to_string(), replacement_history: None, })); let user3 = ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { text: "third user".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&user3), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(user3)); let assistant3 = ResponseItem::Message { id: None, role: "assistant".to_string(), content: vec![ContentItem::OutputText { text: "assistant reply three".to_string(), }], end_turn: None, phase: None, }; live_history.record_items( std::iter::once(&assistant3), reconstruction_turn.truncation_policy, ); rollout_items.push(RolloutItem::ResponseItem(assistant3)); ( rollout_items, live_history.for_prompt(&reconstruction_turn.model_info.input_modalities), ) } #[tokio::test] async fn rejects_escalated_permissions_when_policy_not_on_request() { use crate::exec::ExecParams; use crate::exec_policy::ExecApprovalRequest; use crate::sandboxing::SandboxPermissions; use crate::tools::sandboxing::ExecApprovalRequirement; use crate::turn_diff_tracker::TurnDiffTracker; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::SandboxPolicy; use std::collections::HashMap; let (session, mut turn_context_raw) = make_session_and_context().await; // Ensure policy is NOT OnRequest so the early rejection path triggers turn_context_raw .approval_policy .set(AskForApproval::OnFailure) .expect("test setup should allow updating approval policy"); let session = Arc::new(session); let mut turn_context = Arc::new(turn_context_raw); let timeout_ms = 1000; let sandbox_permissions = SandboxPermissions::RequireEscalated; let params = ExecParams { command: if cfg!(windows) { vec![ "cmd.exe".to_string(), "/C".to_string(), "echo hi".to_string(), ] } else { vec![ "/bin/sh".to_string(), "-c".to_string(), "echo hi".to_string(), ] }, cwd: turn_context.cwd.clone(), expiration: timeout_ms.into(), capture_policy: ExecCapturePolicy::ShellTool, env: HashMap::new(), network: None, sandbox_permissions, windows_sandbox_level: turn_context.windows_sandbox_level, windows_sandbox_private_desktop: turn_context .config .permissions .windows_sandbox_private_desktop, justification: Some("test".to_string()), arg0: None, }; let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let tool_name = "shell"; let call_id = "test-call".to_string(); let handler = ShellHandler; let resp = handler .handle(ToolInvocation { session: Arc::clone(&session), turn: Arc::clone(&turn_context), tracker: Arc::clone(&turn_diff_tracker), call_id, tool_name: tool_name.to_string(), tool_namespace: None, payload: ToolPayload::Function { arguments: serde_json::json!({ "command": params.command.clone(), "workdir": Some(turn_context.cwd.to_string_lossy().to_string()), "timeout_ms": params.expiration.timeout_ms(), "sandbox_permissions": params.sandbox_permissions, "justification": params.justification.clone(), }) .to_string(), }, }) .await; let Err(FunctionCallError::RespondToModel(output)) = resp else { panic!("expected error result"); }; let expected = format!( "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}", policy = turn_context.approval_policy.value() ); pretty_assertions::assert_eq!(output, expected); pretty_assertions::assert_eq!(session.granted_turn_permissions().await, None); // The rejection should not poison the non-escalated path for the same // command. Force DangerFullAccess so this check stays focused on approval // policy rather than platform-specific sandbox behavior. let turn_context_mut = Arc::get_mut(&mut turn_context).expect("unique turn context Arc"); turn_context_mut .sandbox_policy .set(SandboxPolicy::DangerFullAccess) .expect("test setup should allow updating sandbox policy"); turn_context_mut.file_system_sandbox_policy = FileSystemSandboxPolicy::from(turn_context_mut.sandbox_policy.get()); turn_context_mut.network_sandbox_policy = NetworkSandboxPolicy::from(turn_context_mut.sandbox_policy.get()); let exec_approval_requirement = session .services .exec_policy .create_exec_approval_requirement_for_command(ExecApprovalRequest { command: ¶ms.command, approval_policy: turn_context.approval_policy.value(), sandbox_policy: turn_context.sandbox_policy.get(), file_system_sandbox_policy: &turn_context.file_system_sandbox_policy, sandbox_permissions: SandboxPermissions::UseDefault, prefix_rule: None, }) .await; assert!(matches!( exec_approval_requirement, ExecApprovalRequirement::Skip { .. } )); } #[tokio::test] async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { use crate::sandboxing::SandboxPermissions; use crate::turn_diff_tracker::TurnDiffTracker; use codex_protocol::protocol::AskForApproval; let (session, mut turn_context_raw) = make_session_and_context().await; turn_context_raw .approval_policy .set(AskForApproval::OnFailure) .expect("test setup should allow updating approval policy"); let session = Arc::new(session); let turn_context = Arc::new(turn_context_raw); let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); let handler = UnifiedExecHandler; let resp = handler .handle(ToolInvocation { session: Arc::clone(&session), turn: Arc::clone(&turn_context), tracker: Arc::clone(&tracker), call_id: "exec-call".to_string(), tool_name: "exec_command".to_string(), tool_namespace: None, payload: ToolPayload::Function { arguments: serde_json::json!({ "cmd": "echo hi", "sandbox_permissions": SandboxPermissions::RequireEscalated, "justification": "need unsandboxed execution", }) .to_string(), }, }) .await; let Err(FunctionCallError::RespondToModel(output)) = resp else { panic!("expected error result"); }; let expected = format!( "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", policy = turn_context.approval_policy.value() ); pretty_assertions::assert_eq!(output, expected); }