diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs index e2c9bfeb0f..851ec52bfd 100644 --- a/codex-rs/config/src/config_toml.rs +++ b/codex-rs/config/src/config_toml.rs @@ -485,10 +485,6 @@ pub struct ConfigToml { /// See [`crate::types::Notice`] for more details pub notice: Option, - /// Legacy, now use features - /// Deprecated: ignored. Use `model_instructions_file`. - #[schemars(skip)] - pub experimental_instructions_file: Option, pub experimental_compact_prompt_file: Option, pub experimental_use_unified_exec_tool: Option, /// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama". diff --git a/codex-rs/config/src/profile_toml.rs b/codex-rs/config/src/profile_toml.rs index e8e6320490..6cf35be68e 100644 --- a/codex-rs/config/src/profile_toml.rs +++ b/codex-rs/config/src/profile_toml.rs @@ -50,9 +50,6 @@ pub struct ConfigProfile { pub js_repl_node_module_dirs: Option>, /// Optional absolute path to patched zsh used by zsh-exec-bridge-backed shell execution. pub zsh_path: Option, - /// Deprecated: ignored. Use `model_instructions_file`. - #[schemars(skip)] - pub experimental_instructions_file: Option, pub experimental_compact_prompt_file: Option, pub include_permissions_instructions: Option, pub include_apps_instructions: Option, diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 8fd385259b..7ae8434800 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -3608,13 +3608,6 @@ impl Config { } } -pub(crate) fn uses_deprecated_instructions_file(config_layer_stack: &ConfigLayerStack) -> bool { - config_layer_stack - .layers_high_to_low() - .into_iter() - .any(|layer| toml_uses_deprecated_instructions_file(&layer.config)) -} - fn guardian_policy_config_from_requirements( requirements_toml: &ConfigRequirementsToml, ) -> Option { @@ -3628,23 +3621,6 @@ fn normalize_guardian_policy_config(value: Option<&str>) -> Option { }) } -fn toml_uses_deprecated_instructions_file(value: &TomlValue) -> bool { - let Some(table) = value.as_table() else { - return false; - }; - if table.contains_key("experimental_instructions_file") { - return true; - } - let Some(profiles) = table.get("profiles").and_then(TomlValue::as_table) else { - return false; - }; - profiles.values().any(|profile| { - profile.as_table().is_some_and(|profile_table| { - profile_table.contains_key("experimental_instructions_file") - }) - }) -} - /// Returns the path to the Codex configuration directory, which can be /// specified by the `CODEX_HOME` environment variable. If not set, defaults to /// `~/.codex`. diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index fc8ce4d8ca..8f6e3afaf5 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -192,12 +192,6 @@ pub(crate) async fn handle_mcp_tool_call( .unwrap_or_else(|| JsonValue::Object(serde_json::Map::new())), }; } - let request_meta = build_mcp_tool_call_request_meta( - turn_context.as_ref(), - &server, - &call_id, - metadata.as_ref(), - ); let connector_id = metadata .as_ref() .and_then(|metadata| metadata.connector_id.clone()); @@ -235,7 +229,6 @@ pub(crate) async fn handle_mcp_tool_call( &call_id, invocation, metadata.as_ref(), - request_meta, mcp_app_resource_uri, ) .await; @@ -303,7 +296,6 @@ pub(crate) async fn handle_mcp_tool_call( &call_id, invocation, metadata.as_ref(), - request_meta, mcp_app_resource_uri, ) .await @@ -320,7 +312,6 @@ async fn handle_approved_mcp_tool_call( call_id: &str, invocation: McpInvocation, metadata: Option<&McpToolApprovalMetadata>, - request_meta: Option, mcp_app_resource_uri: Option, ) -> HandledMcpToolCall { let server = invocation.server.clone(); @@ -353,6 +344,8 @@ async fn handle_approved_mcp_tool_call( }; let result = async { let rewritten_arguments = rewrite?; + let request_meta = + build_mcp_tool_call_request_meta(turn_context, &server, call_id, metadata); let result = execute_mcp_tool_call( sess, turn_context, diff --git a/codex-rs/core/src/session/config_lock.rs b/codex-rs/core/src/session/config_lock.rs index 1e632ba0cf..85815f8533 100644 --- a/codex-rs/core/src/session/config_lock.rs +++ b/codex-rs/core/src/session/config_lock.rs @@ -186,7 +186,6 @@ fn drop_lockfile_inputs(lock_config: &mut ConfigToml) { lock_config.profiles.clear(); clear_config_lock_debug_controls(lock_config); lock_config.model_instructions_file = None; - lock_config.experimental_instructions_file = None; lock_config.experimental_compact_prompt_file = None; lock_config.model_catalog_json = None; lock_config.sandbox_mode = None; diff --git a/codex-rs/core/src/session/mcp.rs b/codex-rs/core/src/session/mcp.rs index fcaaa17c57..9f27751f7e 100644 --- a/codex-rs/core/src/session/mcp.rs +++ b/codex-rs/core/src/session/mcp.rs @@ -154,6 +154,9 @@ impl Session { id, request, }); + turn_context + .turn_metadata_state + .mark_user_input_requested_during_turn(); self.send_event(turn_context, event).await; rx_response.await.ok() } diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 88c2fd19b0..afca923251 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -2290,6 +2290,9 @@ impl Session { turn_id: turn_context.sub_id.clone(), questions: args.questions, }); + turn_context + .turn_metadata_state + .mark_user_input_requested_during_turn(); self.send_event(turn_context, event).await; rx_response.await.ok() } diff --git a/codex-rs/core/src/session/session.rs b/codex-rs/core/src/session/session.rs index 4a84818cfd..7e38614386 100644 --- a/codex-rs/core/src/session/session.rs +++ b/codex-rs/core/src/session/session.rs @@ -611,19 +611,6 @@ impl Session { }), }); } - if crate::config::uses_deprecated_instructions_file(&config.config_layer_stack) { - post_session_configured_events.push(Event { - id: INITIAL_SUBMIT_ID.to_owned(), - msg: EventMsg::DeprecationNotice(DeprecationNoticeEvent { - summary: "`experimental_instructions_file` is deprecated and ignored. Use `model_instructions_file` instead." - .to_string(), - details: Some( - "Move the setting to `model_instructions_file` in config.toml (or under a profile) to load instructions from a file." - .to_string(), - ), - }), - }); - } for message in &config.startup_warnings { post_session_configured_events.push(Event { id: "".to_owned(), diff --git a/codex-rs/core/src/turn_metadata.rs b/codex-rs/core/src/turn_metadata.rs index 02760582f2..1b2c6b4b12 100644 --- a/codex-rs/core/src/turn_metadata.rs +++ b/codex-rs/core/src/turn_metadata.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; use std::sync::Arc; use std::sync::Mutex; use std::sync::RwLock; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; use codex_utils_string::to_ascii_json_string; use serde::Serialize; @@ -23,6 +25,7 @@ use codex_utils_absolute_path::AbsolutePathBuf; const MODEL_KEY: &str = "model"; const REASONING_EFFORT_KEY: &str = "reasoning_effort"; const TURN_STARTED_AT_UNIX_MS_KEY: &str = "turn_started_at_unix_ms"; +const USER_INPUT_REQUESTED_DURING_TURN_KEY: &str = "user_input_requested_during_turn"; pub(crate) struct McpTurnMetadataContext<'a> { pub(crate) model: &'a str, @@ -186,6 +189,7 @@ pub(crate) struct TurnMetadataState { enriched_header: Arc>>, turn_started_at_unix_ms: Arc>>, responsesapi_client_metadata: Arc>>>, + user_input_requested_during_turn: Arc, enrichment_task: Arc>>>, } @@ -231,6 +235,7 @@ impl TurnMetadataState { enriched_header: Arc::new(RwLock::new(None)), turn_started_at_unix_ms: Arc::new(RwLock::new(None)), responsesapi_client_metadata: Arc::new(RwLock::new(None)), + user_input_requested_during_turn: Arc::new(AtomicBool::new(false)), enrichment_task: Arc::new(Mutex::new(None)), } } @@ -285,9 +290,25 @@ impl TurnMetadataState { metadata.remove(REASONING_EFFORT_KEY); } } + if self + .user_input_requested_during_turn + .load(Ordering::Relaxed) + { + metadata.insert( + USER_INPUT_REQUESTED_DURING_TURN_KEY.to_string(), + Value::Bool(true), + ); + } else { + metadata.remove(USER_INPUT_REQUESTED_DURING_TURN_KEY); + } Some(Value::Object(metadata)) } + pub(crate) fn mark_user_input_requested_during_turn(&self) { + self.user_input_requested_during_turn + .store(true, Ordering::Relaxed); + } + pub(crate) fn set_responsesapi_client_metadata( &self, responsesapi_client_metadata: HashMap, diff --git a/codex-rs/core/src/turn_metadata_tests.rs b/codex-rs/core/src/turn_metadata_tests.rs index 2a38447f86..a9c7a7b87d 100644 --- a/codex-rs/core/src/turn_metadata_tests.rs +++ b/codex-rs/core/src/turn_metadata_tests.rs @@ -213,6 +213,56 @@ fn turn_metadata_state_includes_model_and_reasoning_effort_only_in_request_meta( ); } +#[test] +fn turn_metadata_state_marks_user_input_requested_during_turn_only_for_mcp_request_meta() { + let temp_dir = TempDir::new().expect("temp dir"); + let cwd = temp_dir.path().abs(); + let permission_profile = PermissionProfile::read_only(); + + let state = TurnMetadataState::new( + "session-a".to_string(), + "thread-a".to_string(), + /*thread_source*/ None, + "turn-a".to_string(), + cwd, + &permission_profile, + WindowsSandboxLevel::Disabled, + /*enforce_managed_network*/ false, + ); + + let header = state.current_header_value().expect("header"); + let header_json: Value = serde_json::from_str(&header).expect("json"); + assert!( + header_json + .get(USER_INPUT_REQUESTED_DURING_TURN_KEY) + .is_none() + ); + + let meta = state + .current_meta_value_for_mcp_request(test_mcp_turn_metadata_context()) + .expect("turn metadata should be present"); + assert!(meta.get(USER_INPUT_REQUESTED_DURING_TURN_KEY).is_none()); + + state.mark_user_input_requested_during_turn(); + + let header = state.current_header_value().expect("header"); + let header_json: Value = serde_json::from_str(&header).expect("json"); + assert!( + header_json + .get(USER_INPUT_REQUESTED_DURING_TURN_KEY) + .is_none() + ); + + let meta = state + .current_meta_value_for_mcp_request(test_mcp_turn_metadata_context()) + .expect("turn metadata should be present"); + assert_eq!( + meta.get(USER_INPUT_REQUESTED_DURING_TURN_KEY) + .and_then(Value::as_bool), + Some(true) + ); +} + #[test] fn turn_metadata_state_ignores_client_turn_started_at_unix_ms_before_start() { let temp_dir = TempDir::new().expect("temp dir"); diff --git a/codex-rs/core/tests/common/apps_test_server.rs b/codex-rs/core/tests/common/apps_test_server.rs index 757fc146f2..702f96ef2d 100644 --- a/codex-rs/core/tests/common/apps_test_server.rs +++ b/codex-rs/core/tests/common/apps_test_server.rs @@ -1,4 +1,10 @@ +use crate::test_codex::TestCodexBuilder; +use crate::test_codex::test_codex; use anyhow::Result; +use codex_core::config::Config; +use codex_features::Feature; +use codex_login::CodexAuth; +use codex_models_manager::bundled_models_response; use serde_json::Value; use serde_json::json; use wiremock::Mock; @@ -15,10 +21,21 @@ const CONNECTOR_NAME: &str = "Calendar"; const DISCOVERABLE_CALENDAR_ID: &str = "connector_2128aebfecb84f64a069897515042a44"; const DISCOVERABLE_GMAIL_ID: &str = "connector_68df038e0ba48191908c8434991bbac2"; const CONNECTOR_DESCRIPTION: &str = "Plan events and manage your calendar."; +const CODEX_APPS_META_KEY: &str = "_codex_apps"; const PROTOCOL_VERSION: &str = "2025-11-25"; const SERVER_NAME: &str = "codex-apps-test"; const SERVER_VERSION: &str = "1.0.0"; const SEARCHABLE_TOOL_COUNT: usize = 100; +const CALENDAR_CREATE_EVENT_TOOL_NAME: &str = "calendar_create_event"; +pub const CALENDAR_EXTRACT_TEXT_TOOL_NAME: &str = "calendar_extract_text"; +const CALENDAR_LIST_EVENTS_TOOL_NAME: &str = "calendar_list_events"; +pub const DIRECT_CALENDAR_CREATE_EVENT_TOOL: &str = "mcp__codex_apps__calendar_create_event"; +pub const DIRECT_CALENDAR_LIST_EVENTS_TOOL: &str = "mcp__codex_apps__calendar_list_events"; +pub const DIRECT_CALENDAR_EXTRACT_TEXT_TOOL: &str = "mcp__codex_apps__calendar_extract_text"; +pub const SEARCH_CALENDAR_NAMESPACE: &str = "mcp__codex_apps__calendar"; +pub const SEARCH_CALENDAR_CREATE_TOOL: &str = "_create_event"; +pub const SEARCH_CALENDAR_EXTRACT_TEXT_TOOL: &str = "_extract_text"; +pub const SEARCH_CALENDAR_LIST_TOOL: &str = "_list_events"; pub const CALENDAR_CREATE_EVENT_RESOURCE_URI: &str = "connector://calendar/tools/calendar_create_event"; pub const CALENDAR_CREATE_EVENT_MCP_APP_RESOURCE_URI: &str = @@ -71,6 +88,103 @@ impl AppsTestServer { } } +pub fn configure_search_capable_model(config: &mut Config) { + let mut model_catalog = bundled_models_response() + .unwrap_or_else(|err| panic!("bundled models.json should parse: {err}")); + let model = model_catalog + .models + .iter_mut() + .find(|model| model.slug == "gpt-5.4") + .expect("gpt-5.4 exists in bundled models.json"); + config.model = Some("gpt-5.4".to_string()); + model.supports_search_tool = true; + config.model_catalog = Some(model_catalog); +} + +fn configure_apps(config: &mut Config, apps_base_url: &str) { + config + .features + .enable(Feature::Apps) + .expect("test config should allow feature update"); + config.chatgpt_base_url = apps_base_url.to_string(); +} + +pub fn configure_search_capable_apps(config: &mut Config, apps_base_url: &str) { + configure_apps(config, apps_base_url); + configure_search_capable_model(config); +} + +pub fn apps_enabled_builder(apps_base_url: impl Into) -> TestCodexBuilder { + let apps_base_url = apps_base_url.into(); + test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_config(move |config| configure_apps(config, apps_base_url.as_str())) +} + +pub fn search_capable_apps_builder(apps_base_url: impl Into) -> TestCodexBuilder { + let apps_base_url = apps_base_url.into(); + test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_config(move |config| configure_search_capable_apps(config, apps_base_url.as_str())) +} + +fn apps_tool_call_id(body: &Value) -> Option<&str> { + body.get("params")? + .get("_meta")? + .get(CODEX_APPS_META_KEY)? + .get("call_id")? + .as_str() +} + +async fn recorded_apps_tool_calls(server: &MockServer) -> Vec { + server + .received_requests() + .await + .expect("mock server should capture requests") + .into_iter() + .filter_map(|request| { + let body: Value = serde_json::from_slice(&request.body).ok()?; + (request.url.path() == "/api/codex/apps" + && body.get("method").and_then(Value::as_str) == Some("tools/call")) + .then_some(body) + }) + .collect() +} + +pub async fn recorded_apps_tool_call_by_call_id(server: &MockServer, call_id: &str) -> Value { + let matches = recorded_apps_tool_calls(server) + .await + .into_iter() + .filter(|body| apps_tool_call_id(body) == Some(call_id)) + .collect::>(); + assert_eq!( + matches.len(), + 1, + "expected exactly one apps tools/call request for call_id {call_id}" + ); + matches + .into_iter() + .next() + .expect("matching apps tools/call request should be recorded") +} + +pub async fn recorded_apps_tool_call_by_name(server: &MockServer, tool_name: &str) -> Value { + let matches = recorded_apps_tool_calls(server) + .await + .into_iter() + .filter(|body| body.pointer("/params/name").and_then(Value::as_str) == Some(tool_name)) + .collect::>(); + assert_eq!( + matches.len(), + 1, + "expected exactly one apps tools/call request for tool {tool_name}" + ); + matches + .into_iter() + .next() + .expect("matching apps tools/call request should be recorded") +} + async fn mount_oauth_metadata(server: &MockServer) { Mock::given(method("GET")) .and(path("/.well-known/oauth-authorization-server/mcp")) @@ -187,7 +301,7 @@ impl Respond for CodexAppsJsonRpcResponder { "result": { "tools": [ { - "name": "calendar_create_event", + "name": CALENDAR_CREATE_EVENT_TOOL_NAME, "description": "Create a calendar event.", "annotations": { "readOnlyHint": false, @@ -217,7 +331,7 @@ impl Respond for CodexAppsJsonRpcResponder { } }, { - "name": "calendar_list_events", + "name": CALENDAR_LIST_EVENTS_TOOL_NAME, "description": "List calendar events.", "annotations": { "readOnlyHint": true @@ -242,7 +356,7 @@ impl Respond for CodexAppsJsonRpcResponder { } }, { - "name": "calendar_extract_text", + "name": CALENDAR_EXTRACT_TEXT_TOOL_NAME, "description": "Extract text from an uploaded document.", "annotations": { "readOnlyHint": false diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 70e1a3f0e4..ad7858f804 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -237,54 +237,6 @@ pub fn find_codex_linux_sandbox_exe() -> Result { codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox") } -/// Builds an SSE stream body from a JSON fixture. -/// -/// The fixture must contain an array of objects where each object represents a -/// single SSE event with at least a `type` field matching the `event:` value. -/// Additional fields become the JSON payload for the `data:` line. An object -/// with only a `type` field results in an event with no `data:` section. This -/// makes it trivial to extend the fixtures as OpenAI adds new event kinds or -/// fields. -pub fn load_sse_fixture(path: impl AsRef) -> String { - let events: Vec = - serde_json::from_reader(std::fs::File::open(path).expect("read fixture")) - .expect("parse JSON fixture"); - events - .into_iter() - .map(|e| { - let kind = e - .get("type") - .and_then(|v| v.as_str()) - .expect("fixture event missing type"); - if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { - format!("event: {kind}\n\n") - } else { - format!("event: {kind}\ndata: {e}\n\n") - } - }) - .collect() -} - -pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String { - let replaced = raw.replace("__ID__", id); - let events: Vec = - serde_json::from_str(&replaced).expect("parse JSON fixture"); - events - .into_iter() - .map(|e| { - let kind = e - .get("type") - .and_then(|v| v.as_str()) - .expect("fixture event missing type"); - if e.as_object().map(|o| o.len() == 1).unwrap_or(false) { - format!("event: {kind}\n\n") - } else { - format!("event: {kind}\ndata: {e}\n\n") - } - }) - .collect() -} - pub async fn wait_for_event( codex: &CodexThread, predicate: F, diff --git a/codex-rs/core/tests/fixtures/incomplete_sse.json b/codex-rs/core/tests/fixtures/incomplete_sse.json deleted file mode 100644 index 2876bbfd29..0000000000 --- a/codex-rs/core/tests/fixtures/incomplete_sse.json +++ /dev/null @@ -1,3 +0,0 @@ -[ - {"type": "response.output_item.done"} -] diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 8a461edc63..42bacb9bdc 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -52,6 +52,7 @@ use core_test_support::PathBufExt; use core_test_support::apps_test_server::AppsTestServer; use core_test_support::load_default_config_for_test; use core_test_support::responses::ResponsesRequest; +use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_completed_with_tokens; use core_test_support::responses::ev_message_item_added; @@ -3016,22 +3017,15 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() { let server = MockServer::start().await; // Build a small SSE stream with deltas and a final assistant message. - // We emit the same body for all 3 turns; ids vary but are unused by assertions. - let sse_raw = r##"[ - {"type":"response.output_item.added", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":""}] - }}, - {"type":"response.output_text.delta", "delta":"Hey "}, - {"type":"response.output_text.delta", "delta":"there"}, - {"type":"response.output_text.delta", "delta":"!\n"}, - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":"Hey there!\n"}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"##; - let sse1 = core_test_support::load_sse_fixture_with_id_from_str(sse_raw, "resp1"); + // We emit the same body for all 3 turns. + let sse1 = sse(vec![ + ev_message_item_added("msg-1", ""), + ev_output_text_delta("Hey "), + ev_output_text_delta("there"), + ev_output_text_delta("!\n"), + ev_assistant_message("msg-1", "Hey there!\n"), + ev_completed("resp1"), + ]); let request_log = mount_sse_sequence(&server, vec![sse1.clone(), sse1.clone(), sse1]).await; diff --git a/codex-rs/core/tests/suite/deprecation_notice.rs b/codex-rs/core/tests/suite/deprecation_notice.rs index 36b5fe9d5d..c41ff47f2a 100644 --- a/codex-rs/core/tests/suite/deprecation_notice.rs +++ b/codex-rs/core/tests/suite/deprecation_notice.rs @@ -1,23 +1,16 @@ #![cfg(not(target_os = "windows"))] use anyhow::Ok; -use codex_app_server_protocol::ConfigLayerSource; -use codex_config::ConfigLayerEntry; -use codex_config::ConfigLayerStack; -use codex_config::ConfigRequirements; -use codex_config::ConfigRequirementsToml; use codex_features::Feature; use codex_protocol::protocol::DeprecationNoticeEvent; use codex_protocol::protocol::EventMsg; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; -use core_test_support::test_absolute_path; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event_match; use pretty_assertions::assert_eq; use std::collections::BTreeMap; -use toml::Value as TomlValue; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<()> { @@ -60,62 +53,6 @@ async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<() Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn emits_deprecation_notice_for_experimental_instructions_file() -> anyhow::Result<()> { - skip_if_no_network!(Ok(())); - - let server = start_mock_server().await; - - let mut builder = test_codex().with_config(|config| { - let mut table = toml::map::Map::new(); - table.insert( - "experimental_instructions_file".to_string(), - TomlValue::String("legacy.md".to_string()), - ); - let config_layer = ConfigLayerEntry::new( - ConfigLayerSource::User { - file: test_absolute_path("/tmp/config.toml"), - profile: None, - }, - TomlValue::Table(table), - ); - let config_layer_stack = ConfigLayerStack::new( - vec![config_layer], - ConfigRequirements::default(), - ConfigRequirementsToml::default(), - ) - .expect("build config layer stack"); - config.config_layer_stack = config_layer_stack; - }); - - let TestCodex { codex, .. } = builder.build(&server).await?; - - let notice = wait_for_event_match(&codex, |event| match event { - EventMsg::DeprecationNotice(ev) - if ev.summary.contains("experimental_instructions_file") => - { - Some(ev.clone()) - } - _ => None, - }) - .await; - - let DeprecationNoticeEvent { summary, details } = notice; - assert_eq!( - summary, - "`experimental_instructions_file` is deprecated and ignored. Use `model_instructions_file` instead." - .to_string(), - ); - assert_eq!( - details.as_deref(), - Some( - "Move the setting to `model_instructions_file` in config.toml (or under a profile) to load instructions from a file." - ), - ); - - Ok(()) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn emits_deprecation_notice_for_web_search_feature_flag_values() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/core/tests/suite/mcp_turn_metadata.rs b/codex-rs/core/tests/suite/mcp_turn_metadata.rs new file mode 100644 index 0000000000..17fe33dc8b --- /dev/null +++ b/codex-rs/core/tests/suite/mcp_turn_metadata.rs @@ -0,0 +1,312 @@ +#![cfg(not(target_os = "windows"))] +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use anyhow::Result; +use codex_config::types::AppToolApproval; +use codex_core::config::Config; +use codex_features::Feature; +use codex_protocol::config_types::CollaborationMode; +use codex_protocol::config_types::ModeKind; +use codex_protocol::config_types::Settings; +use codex_protocol::models::PermissionProfile; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::ElicitationAction; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::request_user_input::RequestUserInputAnswer; +use codex_protocol::request_user_input::RequestUserInputResponse; +use codex_protocol::user_input::UserInput; +use core_test_support::PathExt; +use core_test_support::apps_test_server::AppsTestServer; +use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE; +use core_test_support::apps_test_server::recorded_apps_tool_call_by_call_id; +use core_test_support::apps_test_server::search_capable_apps_builder; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_function_call_with_namespace; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::turn_permission_fields; +use core_test_support::wait_for_event; +use core_test_support::wait_for_event_match; +use pretty_assertions::assert_eq; +use serde_json::json; +use std::collections::HashMap; + +fn set_calendar_approval_mode(config: &mut Config, approval_mode: AppToolApproval) { + let approval_mode = match approval_mode { + AppToolApproval::Auto => "auto", + AppToolApproval::Prompt => "prompt", + AppToolApproval::Approve => "approve", + }; + let user_config_path = config.codex_home.join("config.toml").abs(); + let user_config = toml::from_str(&format!( + r#" +[apps.calendar] +default_tools_approval_mode = "{approval_mode}" +"# + )) + .expect("apps config should parse"); + config.config_layer_stack = config + .config_layer_stack + .with_user_config(&user_config_path, user_config); +} + +async fn submit_user_turn( + test: &TestCodex, + text: &str, + approval_policy: AskForApproval, + collaboration_mode: Option, +) -> Result<()> { + let (sandbox_policy, permission_profile) = + turn_permission_fields(PermissionProfile::Disabled, test.cwd.path()); + test.codex + .submit(Op::UserTurn { + environments: None, + items: vec![UserInput::Text { + text: text.to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: test.cwd.path().to_path_buf(), + approval_policy, + approvals_reviewer: None, + sandbox_policy, + permission_profile, + model: test.session_configured.model.clone(), + effort: None, + summary: None, + service_tier: None, + collaboration_mode, + personality: None, + }) + .await?; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn approved_mcp_tool_call_metadata_records_prior_user_input_request() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let call_id = "calendar-call-approval"; + let calendar_args = serde_json::to_string(&json!({ + "title": "Lunch", + "starts_at": "2026-03-10T12:00:00Z" + }))?; + let mock = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call_with_namespace( + call_id, + SEARCH_CALENDAR_NAMESPACE, + SEARCH_CALENDAR_CREATE_TOOL, + &calendar_args, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let mut builder = search_capable_apps_builder(apps_server.chatgpt_base_url.clone()) + .with_config(|config| { + config + .features + .enable(Feature::ToolCallMcpElicitation) + .expect("test config should allow feature update"); + set_calendar_approval_mode(config, AppToolApproval::Prompt); + }); + let test = builder.build(&server).await?; + + submit_user_turn( + &test, + "Use [$calendar](app://calendar) to create a calendar event.", + AskForApproval::OnRequest, + /*collaboration_mode*/ None, + ) + .await?; + + let EventMsg::McpToolCallBegin(begin) = wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::McpToolCallBegin(_)) + }) + .await + else { + unreachable!("event guard guarantees McpToolCallBegin"); + }; + assert_eq!(begin.call_id, call_id); + + let EventMsg::ElicitationRequest(request) = wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::ElicitationRequest(_)) + }) + .await + else { + unreachable!("event guard guarantees ElicitationRequest"); + }; + + test.codex + .submit(Op::ResolveElicitation { + server_name: request.server_name, + request_id: request.id, + decision: ElicitationAction::Accept, + content: None, + meta: None, + }) + .await?; + + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + assert_eq!(mock.requests().len(), 2); + let apps_tool_call = recorded_apps_tool_call_by_call_id(&server, call_id).await; + + assert_eq!( + apps_tool_call + .pointer("/params/_meta/x-codex-turn-metadata/user_input_requested_during_turn"), + Some(&json!(true)) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn mcp_tool_call_metadata_records_prior_request_user_input_tool() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let request_user_input_call_id = "user-input-call"; + let calendar_call_id = "calendar-call-after-user-input"; + let request_user_input_args = json!({ + "questions": [{ + "id": "confirm_path", + "header": "Confirm", + "question": "Proceed with the plan?", + "options": [{ + "label": "Yes (Recommended)", + "description": "Continue the current plan." + }, { + "label": "No", + "description": "Stop and revisit the approach." + }] + }] + }) + .to_string(); + let calendar_args = serde_json::to_string(&json!({ + "title": "Lunch", + "starts_at": "2026-03-10T12:00:00Z" + }))?; + let mock = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + request_user_input_call_id, + "request_user_input", + &request_user_input_args, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_function_call_with_namespace( + calendar_call_id, + SEARCH_CALENDAR_NAMESPACE, + SEARCH_CALENDAR_CREATE_TOOL, + &calendar_args, + ), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_response_created("resp-3"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-3"), + ]), + ], + ) + .await; + + let mut builder = search_capable_apps_builder(apps_server.chatgpt_base_url.clone()) + .with_config(|config| { + set_calendar_approval_mode(config, AppToolApproval::Approve); + }); + let test = builder.build(&server).await?; + + submit_user_turn( + &test, + "Ask for confirmation, then create a calendar event.", + AskForApproval::Never, + Some(CollaborationMode { + mode: ModeKind::Plan, + settings: Settings { + model: test.session_configured.model.clone(), + reasoning_effort: None, + developer_instructions: None, + }, + }), + ) + .await?; + + let request = wait_for_event_match(&test.codex, |event| match event { + EventMsg::RequestUserInput(request) => Some(request.clone()), + _ => None, + }) + .await; + assert_eq!(request.call_id, request_user_input_call_id); + + test.codex + .submit(Op::UserInputAnswer { + id: request.turn_id, + response: RequestUserInputResponse { + answers: HashMap::from([( + "confirm_path".to_string(), + RequestUserInputAnswer { + answers: vec!["Yes (Recommended)".to_string()], + }, + )]), + }, + }) + .await?; + + let EventMsg::McpToolCallBegin(begin) = wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::McpToolCallBegin(_)) + }) + .await + else { + unreachable!("event guard guarantees McpToolCallBegin"); + }; + assert_eq!(begin.call_id, calendar_call_id); + + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + assert_eq!(mock.requests().len(), 3); + let apps_tool_call = recorded_apps_tool_call_by_call_id(&server, calendar_call_id).await; + + assert_eq!( + apps_tool_call + .pointer("/params/_meta/x-codex-turn-metadata/user_input_requested_during_turn"), + Some(&json!(true)) + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 2b5caf5a52..0d89516d3d 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -57,6 +57,7 @@ mod image_rollout; mod items; mod json_result; mod live_cli; +mod mcp_turn_metadata; mod model_overrides; mod model_switching; mod model_visible_layout; diff --git a/codex-rs/core/tests/suite/openai_file_mcp.rs b/codex-rs/core/tests/suite/openai_file_mcp.rs index 0f0dcf46f1..a25cc36a86 100644 --- a/codex-rs/core/tests/suite/openai_file_mcp.rs +++ b/codex-rs/core/tests/suite/openai_file_mcp.rs @@ -5,13 +5,16 @@ use std::path::Path; use anyhow::Context; use anyhow::Result; -use codex_core::config::Config; -use codex_features::Feature; -use codex_login::CodexAuth; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::AskForApproval; use core_test_support::apps_test_server::AppsTestServer; +use core_test_support::apps_test_server::CALENDAR_EXTRACT_TEXT_TOOL_NAME; +use core_test_support::apps_test_server::DIRECT_CALENDAR_EXTRACT_TEXT_TOOL as DOCUMENT_EXTRACT_HOOK_MATCHER; use core_test_support::apps_test_server::DOCUMENT_EXTRACT_TEXT_RESOURCE_URI; +use core_test_support::apps_test_server::SEARCH_CALENDAR_EXTRACT_TEXT_TOOL as DOCUMENT_EXTRACT_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE as DOCUMENT_EXTRACT_NAMESPACE; +use core_test_support::apps_test_server::apps_enabled_builder; +use core_test_support::apps_test_server::recorded_apps_tool_call_by_name; use core_test_support::hooks::trust_discovered_hooks; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -20,7 +23,6 @@ use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; -use core_test_support::test_codex::test_codex; use pretty_assertions::assert_eq; use serde_json::Value; use serde_json::json; @@ -31,17 +33,6 @@ use wiremock::matchers::header; use wiremock::matchers::method; use wiremock::matchers::path; -const DOCUMENT_EXTRACT_NAMESPACE: &str = "mcp__codex_apps__calendar"; -const DOCUMENT_EXTRACT_TOOL: &str = "_extract_text"; -const DOCUMENT_EXTRACT_HOOK_MATCHER: &str = "mcp__codex_apps__calendar_extract_text"; - -fn configure_apps(config: &mut Config, chatgpt_base_url: &str) { - if let Err(err) = config.features.enable(Feature::Apps) { - panic!("test config should allow feature update: {err}"); - } - config.chatgpt_base_url = chatgpt_base_url.to_string(); -} - fn write_post_tool_use_hook(home: &Path) -> Result<()> { let script_path = home.join("post_tool_use_hook.py"); let log_path = home.join("post_tool_use_hook_log.jsonl"); @@ -154,15 +145,13 @@ async fn codex_apps_file_params_upload_local_paths_before_mcp_tool_call() -> Res ) .await; - let mut builder = test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + let mut builder = apps_enabled_builder(apps_server.chatgpt_base_url.clone()) .with_pre_build_hook(move |home| { if let Err(error) = write_post_tool_use_hook(home) { panic!("failed to write apps file post tool use hook fixture: {error}"); } }) .with_config(move |config| { - configure_apps(config, apps_server.chatgpt_base_url.as_str()); trust_discovered_hooks(config); }); let test = builder.build(&server).await?; @@ -192,20 +181,8 @@ async fn codex_apps_file_params_upload_local_paths_before_mcp_tool_call() -> Res })) ); - let apps_tool_call = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find_map(|request| { - let body: Value = serde_json::from_slice(&request.body).ok()?; - (request.url.path() == "/api/codex/apps" - && body.get("method").and_then(Value::as_str) == Some("tools/call") - && body.pointer("/params/name").and_then(Value::as_str) - == Some("calendar_extract_text")) - .then_some(body) - }) - .expect("apps calendar_extract_text tools/call request should be recorded"); + let apps_tool_call = + recorded_apps_tool_call_by_name(&server, CALENDAR_EXTRACT_TEXT_TOOL_NAME).await; assert_eq!( apps_tool_call.pointer("/params/arguments/file"), diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index f371cdacad..3611df6a6a 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -18,7 +18,7 @@ use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; use codex_protocol::user_input::UserInput; use core_test_support::PathBufExt; -use core_test_support::load_sse_fixture_with_id_from_str; +use core_test_support::responses; use core_test_support::responses::ResponseMock; use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::start_mock_server; @@ -61,17 +61,11 @@ async fn review_op_emits_lifecycle_and_review_output() { "overall_confidence_score": 0.8 }) .to_string(); - let sse_template = r#"[ - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":__REVIEW__}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; - let review_json_escaped = serde_json::to_string(&review_json).unwrap(); - let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped); - let (server, _request_log) = - start_responses_server_with_sse(&sse_raw, /*expected_requests*/ 1).await; + let (server, _request_log) = start_responses_server_with_sse( + assistant_message_sse(&review_json), + /*expected_requests*/ 1, + ) + .await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await; @@ -186,15 +180,11 @@ async fn review_op_emits_lifecycle_and_review_output() { async fn review_op_with_plain_text_emits_review_fallback() { skip_if_no_network!(); - let sse_raw = r#"[ - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":"just plain text"}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; - let (server, _request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + let (server, _request_log) = start_responses_server_with_sse( + assistant_message_sse("just plain text"), + /*expected_requests*/ 1, + ) + .await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await; @@ -240,22 +230,17 @@ async fn review_op_with_plain_text_emits_review_fallback() { async fn review_filters_agent_message_related_events() { skip_if_no_network!(); - // Stream simulating a typing assistant message with deltas and finalization. - let sse_raw = r#"[ - {"type":"response.output_item.added", "item":{ - "type":"message", "role":"assistant", "id":"msg-1", - "content":[{"type":"output_text","text":""}] - }}, - {"type":"response.output_text.delta", "delta":"Hi"}, - {"type":"response.output_text.delta", "delta":" there"}, - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", "id":"msg-1", - "content":[{"type":"output_text","text":"Hi there"}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; - let (server, _request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + let (server, _request_log) = start_responses_server_with_sse( + vec![ + responses::ev_message_item_added("msg-1", ""), + responses::ev_output_text_delta("Hi"), + responses::ev_output_text_delta(" there"), + responses::ev_assistant_message("msg-1", "Hi there"), + responses::ev_completed("resp-1"), + ], + /*expected_requests*/ 1, + ) + .await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await; @@ -325,17 +310,11 @@ async fn review_does_not_emit_agent_message_on_structured_output() { "overall_confidence_score": 0.5 }) .to_string(); - let sse_template = r#"[ - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":__REVIEW__}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; - let review_json_escaped = serde_json::to_string(&review_json).unwrap(); - let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped); - let (server, _request_log) = - start_responses_server_with_sse(&sse_raw, /*expected_requests*/ 1).await; + let (server, _request_log) = start_responses_server_with_sse( + assistant_message_sse(&review_json), + /*expected_requests*/ 1, + ) + .await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await; @@ -386,12 +365,8 @@ async fn review_does_not_emit_agent_message_on_structured_output() { async fn review_uses_custom_review_model_from_config() { skip_if_no_network!(); - // Minimal stream: just a completed event - let sse_raw = r#"[ - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; let (server, request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + start_responses_server_with_sse(completed_sse(), /*expected_requests*/ 1).await; let codex_home = Arc::new(TempDir::new().unwrap()); // Choose a review model different from the main model; ensure it is used. let codex = new_conversation_for_server(&server, codex_home.clone(), |cfg| { @@ -441,12 +416,8 @@ async fn review_uses_custom_review_model_from_config() { async fn review_uses_session_model_when_review_model_unset() { skip_if_no_network!(); - // Minimal stream: just a completed event - let sse_raw = r#"[ - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; let (server, request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + start_responses_server_with_sse(completed_sse(), /*expected_requests*/ 1).await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |cfg| { cfg.model = Some("gpt-4.1".to_string()); @@ -496,12 +467,8 @@ async fn review_uses_session_model_when_review_model_unset() { async fn review_input_isolated_from_parent_history() { skip_if_no_network!(); - // Mock server for the single review request - let sse_raw = r#"[ - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; let (server, request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + start_responses_server_with_sse(completed_sse(), /*expected_requests*/ 1).await; // Seed a parent session history via resume file with both user + assistant items. let codex_home = Arc::new(TempDir::new().unwrap()); @@ -674,16 +641,11 @@ async fn review_input_isolated_from_parent_history() { async fn review_history_surfaces_in_parent_session() { skip_if_no_network!(); - // Respond to both the review request and the subsequent parent request. - let sse_raw = r#"[ - {"type":"response.output_item.done", "item":{ - "type":"message", "role":"assistant", - "content":[{"type":"output_text","text":"review assistant output"}] - }}, - {"type":"response.completed", "response": {"id": "__ID__"}} - ]"#; - let (server, request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 2).await; + let (server, request_log) = start_responses_server_with_sse( + assistant_message_sse("review assistant output"), + /*expected_requests*/ 2, + ) + .await; let codex_home = Arc::new(TempDir::new().unwrap()); let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await; @@ -776,9 +738,8 @@ async fn review_history_surfaces_in_parent_session() { async fn review_uses_overridden_cwd_for_base_branch_merge_base() { skip_if_no_network!(); - let sse_raw = r#"[{"type":"response.completed", "response": {"id": "__ID__"}}]"#; let (server, request_log) = - start_responses_server_with_sse(sse_raw, /*expected_requests*/ 1).await; + start_responses_server_with_sse(completed_sse(), /*expected_requests*/ 1).await; let initial_cwd = TempDir::new().unwrap(); @@ -881,13 +842,24 @@ async fn review_uses_overridden_cwd_for_base_branch_merge_base() { server.verify().await; } -/// Start a mock Responses API server and mount the given SSE stream body. +fn assistant_message_sse(text: &str) -> Vec { + vec![ + responses::ev_assistant_message("msg-1", text), + responses::ev_completed("resp-1"), + ] +} + +fn completed_sse() -> Vec { + vec![responses::ev_completed("resp-1")] +} + +/// Start a mock Responses API server and mount the given SSE events. async fn start_responses_server_with_sse( - sse_raw: &str, + events: Vec, expected_requests: usize, ) -> (MockServer, ResponseMock) { let server = start_mock_server().await; - let sse = load_sse_fixture_with_id_from_str(sse_raw, &Uuid::new_v4().to_string()); + let sse = responses::sse(events); let responses = vec![sse; expected_requests]; let request_log = mount_sse_sequence(&server, responses).await; (server, request_log) diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index bb4ca84b29..69dc74dce3 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -7,7 +7,6 @@ use codex_config::types::McpServerTransportConfig; use codex_core::config::Config; use codex_features::Feature; use codex_login::CodexAuth; -use codex_models_manager::bundled_models_response; use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse; use codex_protocol::dynamic_tools::DynamicToolSpec; @@ -21,6 +20,15 @@ use codex_protocol::user_input::UserInput; use core_test_support::apps_test_server::AppsTestServer; use core_test_support::apps_test_server::CALENDAR_CREATE_EVENT_MCP_APP_RESOURCE_URI; use core_test_support::apps_test_server::CALENDAR_CREATE_EVENT_RESOURCE_URI; +use core_test_support::apps_test_server::DIRECT_CALENDAR_CREATE_EVENT_TOOL as CALENDAR_CREATE_TOOL; +use core_test_support::apps_test_server::DIRECT_CALENDAR_LIST_EVENTS_TOOL as CALENDAR_LIST_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_LIST_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE; +use core_test_support::apps_test_server::configure_search_capable_apps; +use core_test_support::apps_test_server::configure_search_capable_model; +use core_test_support::apps_test_server::recorded_apps_tool_call_by_call_id; +use core_test_support::apps_test_server::search_capable_apps_builder as configured_builder; use core_test_support::responses::ResponsesRequest; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -34,7 +42,6 @@ use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::stdio_server_bin; -use core_test_support::test_codex::TestCodexBuilder; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use pretty_assertions::assert_eq; @@ -48,11 +55,6 @@ const SEARCH_TOOL_DESCRIPTION_SNIPPETS: [&str; 2] = [ "- Calendar: Plan events and manage your calendar.", ]; const TOOL_SEARCH_TOOL_NAME: &str = "tool_search"; -const CALENDAR_CREATE_TOOL: &str = "mcp__codex_apps__calendar_create_event"; -const CALENDAR_LIST_TOOL: &str = "mcp__codex_apps__calendar_list_events"; -const SEARCH_CALENDAR_NAMESPACE: &str = "mcp__codex_apps__calendar"; -const SEARCH_CALENDAR_CREATE_TOOL: &str = "_create_event"; -const SEARCH_CALENDAR_LIST_TOOL: &str = "_list_events"; fn tool_names(body: &Value) -> Vec { body.get("tools") @@ -111,28 +113,6 @@ fn tool_search_output_has_namespace_child( namespace_child_tool(&output, namespace, tool_name).is_some() } -fn configure_search_capable_model(config: &mut Config) { - let mut model_catalog = bundled_models_response() - .unwrap_or_else(|err| panic!("bundled models.json should parse: {err}")); - let model = model_catalog - .models - .iter_mut() - .find(|model| model.slug == "gpt-5.4") - .expect("gpt-5.4 exists in bundled models.json"); - config.model = Some("gpt-5.4".to_string()); - model.supports_search_tool = true; - config.model_catalog = Some(model_catalog); -} - -fn configure_search_capable_apps(config: &mut Config, apps_base_url: &str) { - config - .features - .enable(Feature::Apps) - .expect("test config should allow feature update"); - config.chatgpt_base_url = apps_base_url.to_string(); - configure_search_capable_model(config); -} - fn configure_apps_without_tool_search(config: &mut Config, apps_base_url: &str) { configure_search_capable_apps(config, apps_base_url); config @@ -141,16 +121,6 @@ fn configure_apps_without_tool_search(config: &mut Config, apps_base_url: &str) .expect("test config should allow feature update"); } -fn configure_apps(config: &mut Config, apps_base_url: &str) { - configure_search_capable_apps(config, apps_base_url); -} - -fn configured_builder(apps_base_url: String) -> TestCodexBuilder { - test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(move |config| configure_apps(config, apps_base_url.as_str())) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn search_tool_enabled_by_default_adds_tool_search() -> Result<()> { skip_if_no_network!(Ok(())); @@ -321,7 +291,9 @@ async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> { let mut builder = test_codex() .with_auth(CodexAuth::from_api_key("Test API Key")) - .with_config(move |config| configure_apps(config, apps_server.chatgpt_base_url.as_str())); + .with_config(move |config| { + configure_search_capable_apps(config, apps_server.chatgpt_base_url.as_str()) + }); let test = builder.build(&server).await?; test.submit_turn_with_approval_and_permission_profile( @@ -585,18 +557,7 @@ async fn tool_search_returns_deferred_tools_without_follow_up_tool_injection() - assert_eq!(requests.len(), 3); let first_request_body = requests[0].body_json(); - let apps_tool_call = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find_map(|request| { - let body: Value = serde_json::from_slice(&request.body).ok()?; - (request.url.path() == "/api/codex/apps" - && body.get("method").and_then(Value::as_str) == Some("tools/call")) - .then_some(body) - }) - .expect("apps tools/call request should be recorded"); + let apps_tool_call = recorded_apps_tool_call_by_call_id(&server, "calendar-call-1").await; assert_eq!( apps_tool_call.pointer("/params/_meta/_codex_apps"), diff --git a/codex-rs/core/tests/suite/stream_no_completed.rs b/codex-rs/core/tests/suite/stream_no_completed.rs index 984220a086..30574718f2 100644 --- a/codex-rs/core/tests/suite/stream_no_completed.rs +++ b/codex-rs/core/tests/suite/stream_no_completed.rs @@ -6,8 +6,6 @@ use codex_model_provider_info::WireApi; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::Op; use codex_protocol::user_input::UserInput; -use codex_utils_cargo_bin::find_resource; -use core_test_support::load_sse_fixture; use core_test_support::responses; use core_test_support::skip_if_no_network; use core_test_support::streaming_sse::StreamingSseChunk; @@ -17,9 +15,9 @@ use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; fn sse_incomplete() -> String { - let fixture = find_resource!("tests/fixtures/incomplete_sse.json") - .unwrap_or_else(|err| panic!("failed to resolve incomplete_sse fixture: {err}")); - load_sse_fixture(fixture) + responses::sse(vec![serde_json::json!({ + "type": "response.output_item.done", + })]) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index 194ad8b6c2..59f723683b 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -76,31 +76,22 @@ impl Stage { #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Feature { // Stable. - /// Removed compatibility flag retained as a no-op so old configs can - /// still parse `undo`. - GhostCommit, /// Enable the default shell tool. ShellTool, /// Enable Claude-style lifecycle hooks loaded from hooks.json files. CodexHooks, // Experimental - /// Removed compatibility flag for the deleted JavaScript REPL feature. - JsRepl, /// Enable JavaScript code mode backed by the in-process V8 runtime. CodeMode, /// Restrict model-visible tools to code mode entrypoints (`exec`, `wait`). CodeModeOnly, - /// Removed compatibility flag for the deleted JavaScript REPL tool-only mode. - JsReplToolsOnly, /// Use the single unified PTY-backed exec tool. UnifiedExec, /// Route shell tool execution through the zsh exec bridge. ShellZshFork, /// Reflow transcript scrollback when the terminal is resized. TerminalResizeReflow, - /// Removed compatibility flag for the deleted apply_patch fallback feature. - ApplyPatchFreeform, /// Stream structured progress while apply_patch input is being generated. ApplyPatchStreamingEvents, /// Allow exec tools to request additional permissions while staying sandboxed. @@ -112,30 +103,13 @@ pub enum Feature { /// Allow the model to request web searches that fetch cached content. /// Takes precedence over `WebSearchRequest`. WebSearchCached, - /// Legacy search-tool feature flag kept for backward compatibility. - SearchTool, - /// Removed legacy Linux bubblewrap opt-in flag retained as a no-op so old - /// wrappers and config can still parse it. - UseLinuxSandboxBwrap, /// Use the legacy Landlock Linux sandbox fallback instead of the default /// bubblewrap pipeline. UseLegacyLandlock, - /// Allow the model to request approval and propose exec rules. - RequestRule, - /// Enable Windows sandbox (restricted token) on Windows. - WindowsSandbox, - /// Use the elevated Windows sandbox pipeline (setup + runner). - WindowsSandboxElevated, - /// Legacy remote models flag kept for backward compatibility. - RemoteModels, /// Experimental shell snapshotting. ShellSnapshot, - /// Removed legacy git commit attribution guidance flag. - CodexGitCommit, /// Enable runtime metrics snapshots via a manual reader. RuntimeMetrics, - /// Persist rollout metadata to a local SQLite database. - Sqlite, /// Enable startup memory extraction and file-backed memory consolidation. MemoryTool, /// Enable the Chronicle sidecar for passive screen-context memories. @@ -162,8 +136,6 @@ pub enum Feature { ToolSearch, /// Always defer MCP tools behind tool_search instead of exposing small sets directly. ToolSearchAlwaysDeferMcpTools, - /// Removed compatibility flag for the deleted unavailable-tool placeholder backfill. - UnavailableDummyTools, /// Enable discoverable tool suggestions for apps. ToolSuggest, /// Enable plugins. @@ -200,18 +172,12 @@ pub enum Feature { SkillEnvVarDependencyPrompt, /// Enable the unified mention popup prototype. MentionsV2, - /// Steer feature flag - when enabled, Enter submits immediately instead of queuing. - /// Kept for config backward compatibility; behavior is always steer-enabled. - Steer, /// Allow request_user_input in Default collaboration mode. DefaultModeRequestUserInput, /// Enable automatic review for approval prompts. GuardianApproval, /// Enable persisted thread goals and automatic goal continuation. Goals, - /// Enable collaboration modes (Plan, Default). - /// Kept for config backward compatibility; behavior is always collaboration-modes-enabled. - CollaborationModes, /// Route MCP tool approval prompts through the MCP elicitation request path. ToolCallMcpElicitation, /// Prompt Codex Apps connector auth failures through MCP URL elicitations. @@ -224,15 +190,57 @@ pub enum Feature { FastMode, /// Enable experimental realtime voice conversation mode in the TUI. RealtimeConversation, - /// Connect app-server to the ChatGPT remote control service. + /// Prevent idle system sleep while a turn is actively running. + PreventIdleSleep, + /// Send `response.processed` over Responses API websockets after a turn response is recorded. + ResponsesWebsocketResponseProcessed, + /// Enable remote compaction v2 over the normal Responses API. + RemoteCompactionV2, + /// Enable workspace dependency support. + WorkspaceDependencies, + + // Removed + /// Removed compatibility flag retained as a no-op so old configs can + /// still parse `undo`. + GhostCommit, + /// Removed compatibility flag for the deleted JavaScript REPL feature. + JsRepl, + /// Removed compatibility flag for the deleted JavaScript REPL tool-only mode. + JsReplToolsOnly, + /// Legacy search-tool feature flag kept for backward compatibility. + SearchTool, + /// Removed legacy Linux bubblewrap opt-in flag retained as a no-op so old + /// wrappers and config can still parse it. + UseLinuxSandboxBwrap, + /// Allow the model to request approval and propose exec rules. + RequestRule, + /// Enable Windows sandbox (restricted token) on Windows. + WindowsSandbox, + /// Use the elevated Windows sandbox pipeline (setup + runner). + WindowsSandboxElevated, + /// Legacy remote models flag kept for backward compatibility. + RemoteModels, + /// Removed legacy git commit attribution guidance flag. + CodexGitCommit, + /// Persist rollout metadata to a local SQLite database. + Sqlite, + /// Removed compatibility flag for the deleted apply_patch fallback feature. + ApplyPatchFreeform, + /// Removed compatibility flag for the deleted unavailable-tool placeholder backfill. + UnavailableDummyTools, + /// Steer feature flag - when enabled, Enter submits immediately instead of queuing. + /// Kept for config backward compatibility; behavior is always steer-enabled. + Steer, + /// Enable collaboration modes (Plan, Default). + /// Kept for config backward compatibility; behavior is always collaboration-modes-enabled. + CollaborationModes, + /// Removed compatibility flag for the deleted remote control feature. RemoteControl, /// Removed compatibility flag retained as a no-op so old wrappers can /// still pass `--enable image_detail_original`. ImageDetailOriginal, /// Removed compatibility flag. The TUI now always uses the app-server implementation. TuiAppServer, - /// Prevent idle system sleep while a turn is actively running. - PreventIdleSleep, /// Removed compatibility flag retained as a no-op now that workspace owner /// usage nudges are always enabled. WorkspaceOwnerUsageNudge, @@ -240,12 +248,6 @@ pub enum Feature { ResponsesWebsockets, /// Legacy rollout flag for Responses API WebSocket transport v2 experiments. ResponsesWebsocketsV2, - /// Send `response.processed` over Responses API websockets after a turn response is recorded. - ResponsesWebsocketResponseProcessed, - /// Enable remote compaction v2 over the normal Responses API. - RemoteCompactionV2, - /// Enable workspace dependency support. - WorkspaceDependencies, } impl Feature {