diff --git a/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap b/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap index f9a808f65e..c51cb93ac6 100644 --- a/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap +++ b/codex-rs/core/src/session/snapshots/codex_core__codex_tests__fork_startup_context_then_first_turn_diff.snap @@ -1,6 +1,5 @@ --- source: core/src/session/tests.rs -assertion_line: 1619 expression: snapshot --- Scenario: First request after fork when startup preserves the parent baseline, the fork changes approval policy, and the first forked turn enters plan mode. diff --git a/codex-rs/core/src/tools/handlers/multi_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents.rs index 1587c4e840..d26b64722d 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents.rs @@ -18,6 +18,7 @@ pub(crate) use crate::tools::handlers::multi_agents_common::*; use crate::tools::handlers::parse_arguments; use crate::tools::registry::CoreToolRuntime; use crate::tools::registry::ToolExecutor; +use crate::tools::tool_search_entry::ToolSearchInfo; use codex_protocol::ThreadId; use codex_protocol::models::ResponseInputItem; use codex_protocol::openai_models::ReasoningEffort; @@ -34,10 +35,14 @@ use codex_protocol::protocol::CollabWaitingBeginEvent; use codex_protocol::protocol::CollabWaitingEndEvent; use codex_protocol::user_input::UserInput; use codex_tools::ToolName; +use codex_tools::ToolSearchSourceInfo; use serde::Deserialize; use serde::Serialize; use serde_json::Value as JsonValue; +const MULTI_AGENT_TOOL_SEARCH_SOURCE_NAME: &str = "Multi-agent tools"; +const MULTI_AGENT_TOOL_SEARCH_SOURCE_DESCRIPTION: &str = "Spawn and manage sub-agents."; + pub(crate) fn parse_agent_id_target(target: &str) -> Result { ThreadId::from_string(target).map_err(|err| { FunctionCallError::RespondToModel(format!("invalid agent id {target}: {err:?}")) @@ -59,6 +64,20 @@ pub(crate) fn parse_agent_id_targets( .collect() } +fn multi_agent_tool_search_info( + search_text: &str, + spec: codex_tools::ToolSpec, +) -> Option { + ToolSearchInfo::from_spec( + search_text.to_string(), + spec, + Some(ToolSearchSourceInfo { + name: MULTI_AGENT_TOOL_SEARCH_SOURCE_NAME.to_string(), + description: Some(MULTI_AGENT_TOOL_SEARCH_SOURCE_DESCRIPTION.to_string()), + }), + ) +} + pub(crate) use close_agent::Handler as CloseAgentHandler; pub(crate) use resume_agent::Handler as ResumeAgentHandler; pub(crate) use send_input::Handler as SendInputHandler; diff --git a/codex-rs/core/src/tools/handlers/multi_agents/close_agent.rs b/codex-rs/core/src/tools/handlers/multi_agents/close_agent.rs index 805f569b8e..ce8bd5d2ab 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/close_agent.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/close_agent.rs @@ -107,6 +107,13 @@ async fn handle_close_agent( } impl CoreToolRuntime for Handler { + fn search_info(&self) -> Option { + multi_agent_tool_search_info( + "close_agent close shutdown stop agent subagent thread status target", + self.spec()?, + ) + } + fn matches_kind(&self, payload: &ToolPayload) -> bool { matches!(payload, ToolPayload::Function { .. }) } diff --git a/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs b/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs index 71b29fb423..fcb8aeab18 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/resume_agent.rs @@ -135,6 +135,13 @@ async fn handle_resume_agent( } impl CoreToolRuntime for Handler { + fn search_info(&self) -> Option { + multi_agent_tool_search_info( + "resume_agent resume reopen closed agent subagent thread id target", + self.spec()?, + ) + } + fn matches_kind(&self, payload: &ToolPayload) -> bool { matches!(payload, ToolPayload::Function { .. }) } diff --git a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs index 74f7eb05b4..3a42f425ec 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs @@ -91,6 +91,13 @@ impl ToolExecutor for Handler { } impl CoreToolRuntime for Handler { + fn search_info(&self) -> Option { + multi_agent_tool_search_info( + "send_input send message existing agent subagent follow up interrupt redirect queue target", + self.spec()?, + ) + } + fn matches_kind(&self, payload: &ToolPayload) -> bool { matches!(payload, ToolPayload::Function { .. }) } diff --git a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs index d19849d443..ce87c9f08d 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs @@ -200,6 +200,13 @@ async fn handle_spawn_agent( } impl CoreToolRuntime for Handler { + fn search_info(&self) -> Option { + multi_agent_tool_search_info( + "spawn_agent spawn agent subagent sub-agent delegate delegation parallel work worker explorer no-apps fork model reasoning", + self.spec()?, + ) + } + fn matches_kind(&self, payload: &ToolPayload) -> bool { matches!(payload, ToolPayload::Function { .. }) } diff --git a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs index a69784b901..97841b3961 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs @@ -203,6 +203,13 @@ impl ToolExecutor for Handler { } impl CoreToolRuntime for Handler { + fn search_info(&self) -> Option { + multi_agent_tool_search_info( + "wait_agent wait agent subagent status final result complete timeout targets", + self.spec()?, + ) + } + fn matches_kind(&self, payload: &ToolPayload) -> bool { matches!(payload, ToolPayload::Function { .. }) } diff --git a/codex-rs/core/src/tools/spec_plan.rs b/codex-rs/core/src/tools/spec_plan.rs index 26c91523cc..d75d56404d 100644 --- a/codex-rs/core/src/tools/spec_plan.rs +++ b/codex-rs/core/src/tools/spec_plan.rs @@ -610,23 +610,35 @@ fn add_collaboration_tools(context: &CoreToolPlanContext<'_>, planned_tools: &mu } else { let agent_type_description = agent_type_description(turn_context, context.default_agent_type_description); - planned_tools.add_runtime(SpawnAgentHandler::new(SpawnAgentToolOptions { - available_models: turn_context.available_models.clone(), - agent_type_description, - hide_agent_type_model_reasoning: turn_context - .config - .multi_agent_v2 - .hide_spawn_agent_metadata, - include_usage_hint: turn_context.config.multi_agent_v2.usage_hint_enabled, - usage_hint_text: turn_context.config.multi_agent_v2.usage_hint_text.clone(), - max_concurrent_threads_per_session: max_concurrent_threads_per_session( - turn_context, - ), - })); - planned_tools.add_runtime(SendInputHandler); - planned_tools.add_runtime(ResumeAgentHandler); - planned_tools.add_runtime(WaitAgentHandler::new(context.wait_agent_timeouts)); - planned_tools.add_runtime(CloseAgentHandler); + let exposure = + if search_tool_enabled(turn_context) && namespace_tools_enabled(turn_context) { + ToolExposure::Deferred + } else { + ToolExposure::Direct + }; + planned_tools.add_runtime_arc(multi_agent_v1_handler( + SpawnAgentHandler::new(SpawnAgentToolOptions { + available_models: turn_context.available_models.clone(), + agent_type_description, + hide_agent_type_model_reasoning: turn_context + .config + .multi_agent_v2 + .hide_spawn_agent_metadata, + include_usage_hint: turn_context.config.multi_agent_v2.usage_hint_enabled, + usage_hint_text: turn_context.config.multi_agent_v2.usage_hint_text.clone(), + max_concurrent_threads_per_session: max_concurrent_threads_per_session( + turn_context, + ), + }), + exposure, + )); + planned_tools.add_runtime_arc(multi_agent_v1_handler(SendInputHandler, exposure)); + planned_tools.add_runtime_arc(multi_agent_v1_handler(ResumeAgentHandler, exposure)); + planned_tools.add_runtime_arc(multi_agent_v1_handler( + WaitAgentHandler::new(context.wait_agent_timeouts), + exposure, + )); + planned_tools.add_runtime_arc(multi_agent_v1_handler(CloseAgentHandler, exposure)); } } @@ -757,6 +769,13 @@ fn append_extension_tool_executors( } } +fn multi_agent_v1_handler( + handler: impl CoreToolRuntime + 'static, + exposure: ToolExposure, +) -> Arc { + override_tool_exposure(Arc::new(handler), exposure) +} + fn multi_agent_v2_handler( handler: impl CoreToolRuntime + 'static, exposure: ToolExposure, diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 3c7c361e88..57bab6e39f 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -470,6 +470,7 @@ async fn mcp_and_tool_search_follow_direct_and_deferred_tool_exposure() { missing_model_capability.assert_visible_lacks(&["tool_search"]); let missing_deferred_tools = probe(|turn| { + set_feature(turn, Feature::Collab, /*enabled*/ false); turn.model_info.supports_search_tool = true; }) .await; @@ -653,6 +654,39 @@ async fn multi_agent_feature_selects_one_agent_tool_family() { ); } +#[tokio::test] +async fn v1_multi_agent_tools_defer_when_tool_search_available() { + let plan = probe(|turn| { + turn.model_info.supports_search_tool = true; + set_feature(turn, Feature::Collab, /*enabled*/ true); + set_feature(turn, Feature::MultiAgentV2, /*enabled*/ false); + }) + .await; + + plan.assert_visible_contains(&["tool_search"]); + plan.assert_visible_lacks(&[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + ]); + for tool_name in [ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + ] { + plan.assert_registered_contains(&[tool_name]); + assert_eq!(plan.exposure(tool_name), ToolExposure::Deferred); + } + let ToolSpec::ToolSearch { description, .. } = plan.visible_spec("tool_search") else { + panic!("expected visible tool_search spec"); + }; + assert!(description.contains("- Multi-agent tools: Spawn and manage sub-agents.")); +} + #[tokio::test] async fn multi_agent_v2_can_use_configured_tool_namespace() { let namespaced = probe(|turn| { diff --git a/codex-rs/core/tests/suite/collaboration_instructions.rs b/codex-rs/core/tests/suite/collaboration_instructions.rs index bf8de53dd1..0f9bee9431 100644 --- a/codex-rs/core/tests/suite/collaboration_instructions.rs +++ b/codex-rs/core/tests/suite/collaboration_instructions.rs @@ -49,13 +49,6 @@ fn developer_texts(input: &[Value]) -> Vec { .collect() } -fn developer_message_count(input: &[Value]) -> usize { - input - .iter() - .filter(|item| item.get("role").and_then(Value::as_str) == Some("developer")) - .count() -} - fn collab_xml(text: &str) -> String { format!("{COLLABORATION_MODE_OPEN_TAG}{text}{COLLABORATION_MODE_CLOSE_TAG}") } @@ -92,7 +85,6 @@ async fn no_collaboration_instructions_by_default() -> Result<()> { wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; let input = req.single_request().input(); - assert_eq!(developer_message_count(&input), 1); let dev_texts = developer_texts(&input); assert!( dev_texts @@ -790,7 +782,6 @@ async fn empty_collaboration_instructions_are_ignored() -> Result<()> { wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; let input = req.single_request().input(); - assert_eq!(developer_message_count(&input), 1); let dev_texts = developer_texts(&input); let collab_text = collab_xml(""); assert_eq!(count_messages_containing(&dev_texts, &collab_text), 0); diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 9aac846ffe..d868abbfee 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -1553,7 +1553,7 @@ async fn remote_compact_trim_estimate_uses_session_base_instructions() -> Result let override_base_instructions = format!( "{}\nREMOTE_BASE_INSTRUCTIONS_OVERRIDE {}", baseline_compact_request.instructions_text(), - "x".repeat(4_000) + "x".repeat(8_000) ); let override_context_window = baseline_payload_tokens.saturating_add(500); let pretrim_override_estimate = diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index 68b3816ae6..a4b337f059 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -182,11 +182,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { "request_user_input", "apply_patch", "view_image", - "spawn_agent", - "send_input", - "resume_agent", - "wait_agent", - "close_agent", + "tool_search", "web_search", ]); let body0 = req1.single_request().body_json(); diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index 8a092d7605..ca5c146e5a 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -217,7 +217,7 @@ async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> { +async fn app_search_sources_are_hidden_for_api_key_auth() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -249,8 +249,13 @@ async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> { let body = mock.single_request().body_json(); let tools = tool_names(&body); assert!( - !tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME), - "tools list should not include {TOOL_SEARCH_TOOL_NAME} for API key auth: {tools:?}" + !tools.iter().any(|name| name == SEARCH_CALENDAR_NAMESPACE), + "tools list should not include app tools for API key auth: {tools:?}" + ); + let description = tool_search_description(&body).unwrap_or_default(); + assert!( + !description.contains("Calendar"), + "tool_search description should not include app sources for API key auth: {description}" ); Ok(()) @@ -671,6 +676,96 @@ async fn tool_search_returns_deferred_tools_without_follow_up_tool_injection() - Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn tool_search_returns_deferred_v1_multi_agent_tools() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let call_id = "tool-search-spawn-agent"; + let mock = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_tool_search_call( + call_id, + &json!({ + "query": "spawn agent", + "limit": 1, + }), + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let mut builder = test_codex().with_config(configure_search_capable_model); + let test = builder.build(&server).await?; + test.submit_turn_with_approval_and_permission_profile( + "Find the spawn agent tool", + AskForApproval::Never, + PermissionProfile::Disabled, + ) + .await?; + + let requests = mock.requests(); + assert_eq!(requests.len(), 2); + + let first_request_body = requests[0].body_json(); + let first_request_tools = tool_names(&first_request_body); + assert!( + first_request_tools + .iter() + .any(|name| name == TOOL_SEARCH_TOOL_NAME), + "first request should advertise tool_search: {first_request_tools:?}" + ); + for tool_name in [ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + ] { + assert!( + !first_request_tools.iter().any(|name| name == tool_name), + "v1 multi-agent tools should be hidden before search: {first_request_tools:?}" + ); + } + assert!( + !first_request_body + .to_string() + .contains("Only use `spawn_agent` if and only if"), + "deferred v1 multi-agent guidance should stay out of initial developer context" + ); + + let tools = tool_search_output_tools(&requests[1], call_id); + let spawn_agent = tools + .iter() + .find(|tool| { + tool.get("type").and_then(Value::as_str) == Some("function") + && tool.get("name").and_then(Value::as_str) == Some("spawn_agent") + }) + .unwrap_or_else(|| panic!("expected tool_search to return spawn_agent: {tools:?}")); + assert_eq!( + spawn_agent.get("defer_loading").and_then(Value::as_bool), + Some(true) + ); + let description = spawn_agent + .get("description") + .and_then(Value::as_str) + .expect("spawn_agent description should be present"); + assert!(description.contains("Only use `spawn_agent` if and only if")); + assert!(description.contains("### Designing delegated subtasks")); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn tool_search_returns_deferred_dynamic_tool_and_routes_follow_up_call() -> Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/core/tests/suite/subagent_notifications.rs b/codex-rs/core/tests/suite/subagent_notifications.rs index 3a0c37acc7..f56df240d5 100644 --- a/codex-rs/core/tests/suite/subagent_notifications.rs +++ b/codex-rs/core/tests/suite/subagent_notifications.rs @@ -9,8 +9,10 @@ use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_response_created; +use core_test_support::responses::ev_tool_search_call; use core_test_support::responses::mount_response_once_match; use core_test_support::responses::mount_sse_once_match; +use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::sse_response; use core_test_support::responses::start_mock_server; @@ -18,6 +20,7 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use pretty_assertions::assert_eq; +use serde_json::Value; use serde_json::json; use std::fs; use std::path::Path; @@ -64,28 +67,22 @@ fn has_subagent_notification(req: &ResponsesRequest) -> bool { .any(|text| text.contains("")) } -fn tool_parameter_description( - req: &ResponsesRequest, - tool_name: &str, - parameter_name: &str, -) -> Option { - req.body_json() +fn tool_parameter_description(tool: &Value, parameter_name: &str) -> Option { + tool.get("parameters") + .and_then(|parameters| parameters.get("properties")) + .and_then(|properties| properties.get(parameter_name)) + .and_then(|parameter| parameter.get("description")) + .and_then(Value::as_str) + .map(str::to_owned) +} + +fn tool_search_output_tools(request: &ResponsesRequest, call_id: &str) -> Vec { + request + .tool_search_output(call_id) .get("tools") - .and_then(serde_json::Value::as_array) - .and_then(|tools| { - tools.iter().find_map(|tool| { - if tool.get("name").and_then(serde_json::Value::as_str) == Some(tool_name) { - tool.get("parameters") - .and_then(|parameters| parameters.get("properties")) - .and_then(|properties| properties.get(parameter_name)) - .and_then(|parameter| parameter.get("description")) - .and_then(serde_json::Value::as_str) - .map(str::to_owned) - } else { - None - } - }) - }) + .and_then(Value::as_array) + .cloned() + .unwrap_or_default() } fn role_block(description: &str, role_name: &str) -> Option { @@ -646,14 +643,27 @@ async fn spawn_agent_tool_description_mentions_role_locked_settings() -> Result< skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let resp_mock = mount_sse_once_match( + let call_id = "tool-search-spawn-agent"; + let resp_mock = mount_sse_sequence( &server, - |req: &wiremock::Request| body_contains(req, TURN_1_PROMPT), - sse(vec![ - ev_response_created("resp-turn1-1"), - ev_assistant_message("msg-turn1-1", "done"), - ev_completed("resp-turn1-1"), - ]), + vec![ + sse(vec![ + ev_response_created("resp-turn1-1"), + ev_tool_search_call( + call_id, + &json!({ + "query": "spawn agent custom role", + "limit": 1, + }), + ), + ev_completed("resp-turn1-1"), + ]), + sse(vec![ + ev_response_created("resp-turn1-2"), + ev_assistant_message("msg-turn1-2", "done"), + ev_completed("resp-turn1-2"), + ]), + ], ) .await; @@ -683,8 +693,17 @@ async fn spawn_agent_tool_description_mentions_role_locked_settings() -> Result< test.submit_turn(TURN_1_PROMPT).await?; - let request = resp_mock.single_request(); - let agent_type_description = tool_parameter_description(&request, "spawn_agent", "agent_type") + let requests = resp_mock.requests(); + assert_eq!(requests.len(), 2); + let tools = tool_search_output_tools(&requests[1], call_id); + let spawn_agent = tools + .iter() + .find(|tool| { + tool.get("type").and_then(Value::as_str) == Some("function") + && tool.get("name").and_then(Value::as_str) == Some("spawn_agent") + }) + .unwrap_or_else(|| panic!("expected tool_search to return spawn_agent: {tools:?}")); + let agent_type_description = tool_parameter_description(spawn_agent, "agent_type") .expect("spawn_agent agent_type description"); let custom_role_description = role_block(&agent_type_description, "custom").expect("custom role description");