diff --git a/codex-rs/app-server-protocol/src/protocol/thread_history.rs b/codex-rs/app-server-protocol/src/protocol/thread_history.rs index cec380a88a..ceb835aa1c 100644 --- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs +++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs @@ -2529,6 +2529,7 @@ mod tests { prompt: "inspect the repo".into(), model: "gpt-5.4-mini".into(), reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Medium, + spawn_mode: codex_protocol::protocol::AgentSpawnMode::Spawn, status: AgentStatus::Running, }), ]; diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 701e7f47ea..f82611df25 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -3428,8 +3428,18 @@ impl CodexMessageProcessor { } else { read_summary_from_state_db_by_thread_id(&self.config, thread_uuid).await }; + let loaded_rollout_path = loaded_thread + .as_ref() + .and_then(|thread| thread.rollout_path()); let mut rollout_path = db_summary.as_ref().map(|summary| summary.path.clone()); - if rollout_path.is_none() || include_turns { + if rollout_path.is_none() + && let Some(path) = loaded_rollout_path.as_ref() + && tokio::fs::try_exists(path).await.unwrap_or(false) + { + rollout_path = Some(path.clone()); + } + let should_lookup_rollout = rollout_path.is_none() && loaded_thread.is_none(); + if should_lookup_rollout { rollout_path = match find_thread_path_by_id_str(&self.config.codex_home, &thread_uuid.to_string()) .await @@ -3490,7 +3500,6 @@ impl CodexMessageProcessor { return; }; let config_snapshot = thread.config_snapshot().await; - let loaded_rollout_path = thread.rollout_path(); if include_turns && loaded_rollout_path.is_none() { self.send_invalid_request_error( request_id, @@ -8928,6 +8937,7 @@ mod tests { sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, cwd: PathBuf::from("/tmp"), ephemeral: false, + agent_use_function_call_inbox: false, reasoning_effort: None, personality: None, session_source: SessionSource::Cli, diff --git a/codex-rs/app-server/tests/suite/v2/initialize.rs b/codex-rs/app-server/tests/suite/v2/initialize.rs index 165160468f..ca33aae852 100644 --- a/codex-rs/app-server/tests/suite/v2/initialize.rs +++ b/codex-rs/app-server/tests/suite/v2/initialize.rs @@ -20,11 +20,17 @@ use core_test_support::fs_wait; use pretty_assertions::assert_eq; use serde_json::Value; use std::path::Path; -use std::time::Duration; use tempfile::TempDir; use tokio::time::timeout; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +#[cfg(windows)] +const DEFAULT_NOTIFY_FILE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] +const DEFAULT_NOTIFY_FILE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5); #[tokio::test] async fn initialize_uses_client_info_name_as_originator() -> Result<()> { @@ -270,9 +276,9 @@ async fn turn_start_notify_payload_includes_initialize_client_name() -> Result<( mcp.read_stream_until_notification_message("turn/completed"), ) .await??; - - fs_wait::wait_for_path_exists(¬ify_file, Duration::from_secs(5)).await?; - let payload_raw = tokio::fs::read_to_string(¬ify_file).await?; + let notify_file = Path::new(¬ify_file); + fs_wait::wait_for_path_exists(notify_file, DEFAULT_NOTIFY_FILE_TIMEOUT).await?; + let payload_raw = tokio::fs::read_to_string(notify_file).await?; let payload: Value = serde_json::from_str(&payload_raw)?; assert_eq!(payload["client"], "xcode"); diff --git a/codex-rs/app-server/tests/suite/v2/thread_read.rs b/codex-rs/app-server/tests/suite/v2/thread_read.rs index 20dff3ddcb..1503d9a370 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_read.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_read.rs @@ -557,6 +557,62 @@ async fn thread_read_include_turns_rejects_unmaterialized_loaded_thread() -> Res Ok(()) } +#[tokio::test] +async fn thread_read_loaded_ephemeral_thread_ignores_unrelated_rollout_mentions() -> Result<()> { + let server = create_mock_responses_server_repeating_assistant("Done").await; + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let start_id = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ephemeral: Some(true), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_id)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(start_resp)?; + + let unrelated_preview = thread.id.clone(); + let _unrelated_rollout_id = create_fake_rollout_with_text_elements( + codex_home.path(), + "2025-01-05T13-00-00", + "2025-01-05T13:00:00Z", + &unrelated_preview, + vec![], + Some("mock_provider"), + /*git_info*/ None, + )?; + + let read_id = mcp + .send_thread_read_request(ThreadReadParams { + thread_id: thread.id.clone(), + include_turns: false, + }) + .await?; + let read_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(read_id)), + ) + .await??; + let ThreadReadResponse { thread: read } = to_response::(read_resp)?; + + assert_eq!(read.id, thread.id); + assert!(read.ephemeral); + assert_eq!(read.path, None); + assert!(read.preview.is_empty()); + assert_eq!(read.status, ThreadStatus::Idle); + + Ok(()) +} + #[tokio::test] async fn thread_read_reports_system_error_idle_flag_after_failed_turn() -> Result<()> { let server = responses::start_mock_server().await; diff --git a/codex-rs/app-server/tests/suite/v2/thread_resume.rs b/codex-rs/app-server/tests/suite/v2/thread_resume.rs index 42bcfe56e9..a9c610f8b0 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs @@ -62,6 +62,7 @@ use std::path::Path; use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; +use tokio::time::sleep; use tokio::time::timeout; use uuid::Uuid; use wiremock::Mock; @@ -70,39 +71,12 @@ use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); const CODEX_5_2_INSTRUCTIONS_TEMPLATE_DEFAULT: &str = "You are Codex, a coding agent based on GPT-5. You and the user share the same workspace and collaborate to achieve the user's goals."; -async fn wait_for_responses_request_count( - server: &wiremock::MockServer, - expected_count: usize, -) -> Result<()> { - timeout(DEFAULT_READ_TIMEOUT, async { - loop { - let Some(requests) = server.received_requests().await else { - anyhow::bail!("wiremock did not record requests"); - }; - let responses_request_count = requests - .iter() - .filter(|request| { - request.method == "POST" && request.url.path().ends_with("/responses") - }) - .count(); - if responses_request_count == expected_count { - return Ok::<(), anyhow::Error>(()); - } - if responses_request_count > expected_count { - anyhow::bail!( - "expected exactly {expected_count} /responses requests, got {responses_request_count}" - ); - } - tokio::time::sleep(std::time::Duration::from_millis(10)).await; - } - }) - .await??; - Ok(()) -} - #[tokio::test] async fn thread_resume_rejects_unmaterialized_thread() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; @@ -1075,13 +1049,9 @@ async fn thread_resume_replays_pending_command_execution_request_approval() -> R let responses = vec![ create_final_assistant_message_sse_response("seeded")?, create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], + fast_shell_command(), /*workdir*/ None, - Some(5000), + Some(1000), "call-1", )?, create_final_assistant_message_sse_response("done")?, @@ -1201,7 +1171,7 @@ async fn thread_resume_replays_pending_command_execution_request_approval() -> R primary.read_stream_until_notification_message("turn/completed"), ) .await??; - wait_for_responses_request_count(&server, /*expected_count*/ 3).await?; + wait_for_mock_request_count(&server, /*expected*/ 3).await?; Ok(()) } @@ -1367,11 +1337,50 @@ async fn thread_resume_replays_pending_file_change_request_approval() -> Result< primary.read_stream_until_notification_message("turn/completed"), ) .await??; - wait_for_responses_request_count(&server, /*expected_count*/ 3).await?; + wait_for_mock_request_count(&server, /*expected*/ 3).await?; Ok(()) } +fn fast_shell_command() -> Vec { + if cfg!(windows) { + vec![ + "cmd".to_string(), + "/d".to_string(), + "/c".to_string(), + "echo 42".to_string(), + ] + } else { + vec![ + "python3".to_string(), + "-c".to_string(), + "print(42)".to_string(), + ] + } +} + +async fn wait_for_mock_request_count(server: &MockServer, expected: usize) -> Result<()> { + let deadline = tokio::time::Instant::now() + DEFAULT_READ_TIMEOUT; + loop { + let requests = server + .received_requests() + .await + .ok_or_else(|| anyhow::anyhow!("failed to fetch received requests"))?; + if requests.len() >= expected { + return Ok(()); + } + + if tokio::time::Instant::now() >= deadline { + anyhow::bail!( + "expected at least {expected} mock requests, observed {}", + requests.len() + ); + } + + sleep(std::time::Duration::from_millis(50)).await; + } +} + #[tokio::test] async fn thread_resume_with_overrides_defers_updated_at_until_turn_start() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; diff --git a/codex-rs/app-server/tests/suite/v2/thread_start.rs b/codex-rs/app-server/tests/suite/v2/thread_start.rs index 1a73444bda..e31a02ded6 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_start.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_start.rs @@ -200,9 +200,14 @@ async fn thread_start_accepts_flex_service_tier() -> Result<()> { let codex_home = TempDir::new()?; create_config_toml(codex_home.path(), &server.uri())?; + let read_timeout = if cfg!(windows) { + std::time::Duration::from_secs(15) + } else { + DEFAULT_READ_TIMEOUT + }; let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let req_id = mcp .send_thread_start_request(ThreadStartParams { @@ -212,7 +217,7 @@ async fn thread_start_accepts_flex_service_tier() -> Result<()> { .await?; let resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(req_id)), ) .await??; diff --git a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs index 5808f0fe79..da0304664d 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs @@ -33,6 +33,9 @@ use pretty_assertions::assert_eq; use tempfile::TempDir; use tokio::time::timeout; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); async fn wait_for_responses_request_count_to_stabilize( diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index 3155a4be9c..a641346f9d 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -1026,24 +1026,16 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> { // Second turn same, but we'll set approval_policy=never to avoid elicitation. let responses = vec![ create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], + fast_shell_command(), /*workdir*/ None, - Some(5000), + Some(1000), "call1", )?, create_final_assistant_message_sse_response("done 1")?, create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], + fast_shell_command(), /*workdir*/ None, - Some(5000), + Some(1000), "call2", )?, create_final_assistant_message_sse_response("done 2")?, @@ -1170,6 +1162,23 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> { Ok(()) } +fn fast_shell_command() -> Vec { + if cfg!(windows) { + vec![ + "cmd".to_string(), + "/d".to_string(), + "/c".to_string(), + "echo 42".to_string(), + ] + } else { + vec![ + "python3".to_string(), + "-c".to_string(), + "print(42)".to_string(), + ] + } +} + #[tokio::test] async fn turn_start_exec_approval_decline_v2() -> Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs index 105ae54542..f7bcc78267 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs @@ -215,9 +215,12 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { ]), &zsh_path, )?; + // This flow can require several sequential approval round-trips on slower + // macOS runners before the parent command reaches a terminal state. + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -227,7 +230,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -348,9 +351,12 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { ]), &zsh_path, )?; + // This flow can require several sequential approval round-trips on slower + // macOS runners before the parent command reaches a terminal state. + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -360,7 +366,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -507,9 +513,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() ]), &zsh_path, )?; + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -519,7 +526,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -548,7 +555,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() }) .await?; let turn_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(turn_id)), ) .await??; @@ -566,11 +573,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() let second_file_str = second_file.to_string_lossy().into_owned(); let parent_shell_hint = format!("&& {}", &first_file_str); while target_decision_index < target_decisions.len() || !saw_parent_approval { - let server_req = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_request_message(), - ) - .await??; + let server_req = timeout(read_timeout, mcp.read_stream_until_request_message()).await??; let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else { panic!("expected CommandExecutionRequestApproval request"); @@ -640,7 +643,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() assert_eq!(approved_subcommand_strings.len(), 2); assert!(approved_subcommand_strings[0].contains(&first_file.display().to_string())); assert!(approved_subcommand_strings[1].contains(&second_file.display().to_string())); - let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async { + let parent_completed_command_execution = timeout(read_timeout, async { loop { let completed_notif = mcp .read_stream_until_notification_message("item/completed") @@ -682,7 +685,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() } match timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_notification_message("turn/completed"), ) .await @@ -705,7 +708,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() mcp.interrupt_turn_and_wait_for_aborted( thread.id.clone(), turn.id.clone(), - DEFAULT_READ_TIMEOUT, + read_timeout, ) .await?; } @@ -718,7 +721,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() // sandbox failures can also complete the turn before the parent // completion item is observed. let completed_notif = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_notification_message("turn/completed"), ) .await??; diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index e36f495a1b..4b4c60ff5b 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -6,6 +6,13 @@ "description": "A path that is guaranteed to be absolute and normalized (though it is not guaranteed to be canonicalized or exist on the filesystem).\n\nIMPORTANT: When deserializing an `AbsolutePathBuf`, a base path must be set using [AbsolutePathBufGuard::new]. If no base path is set, the deserialization will fail unless the path being deserialized is already absolute.", "type": "string" }, + "AgentRoleSpawnMode": { + "enum": [ + "spawn", + "fork" + ], + "type": "string" + }, "AgentRoleToml": { "additionalProperties": false, "properties": { @@ -21,12 +28,34 @@ "description": "Human-facing role documentation used in spawn tool guidance. Required unless supplied by the referenced agent role file.", "type": "string" }, + "fork_context": { + "description": "Default fork-context behavior for this role.", + "type": "boolean" + }, + "model": { + "description": "Optional model override applied by this role.", + "type": "string" + }, "nickname_candidates": { "description": "Candidate nicknames for agents spawned with this role.", "items": { "type": "string" }, "type": "array" + }, + "spawn_mode": { + "allOf": [ + { + "$ref": "#/definitions/AgentRoleSpawnMode" + } + ], + "description": "Optional default spawn mode when `spawn_agent` omits `spawn_mode`." + }, + "watchdog_interval_s": { + "description": "Optional watchdog interval in seconds for roles that should behave as watchdogs.", + "format": "int64", + "minimum": 1.0, + "type": "integer" } }, "type": "object" @@ -53,6 +82,11 @@ "format": "uint", "minimum": 1.0, "type": "integer" + }, + "use_function_call_inbox": { + "default": false, + "description": "Deliver inbound agent messages to non-subagent threads as a synthetic function_call/function_call_output pair instead of plain user input.", + "type": "boolean" } }, "type": "object" @@ -332,6 +366,15 @@ "default": null, "description": "Optional feature toggles scoped to this profile.", "properties": { + "agent_function_call_inbox": { + "type": "boolean" + }, + "agent_prompt_injection": { + "type": "boolean" + }, + "agent_watchdog": { + "type": "boolean" + }, "apply_patch_freeform": { "type": "boolean" }, @@ -1974,6 +2017,15 @@ "default": null, "description": "Centralized feature flags (new). Prefer this over individual toggles.", "properties": { + "agent_function_call_inbox": { + "type": "boolean" + }, + "agent_prompt_injection": { + "type": "boolean" + }, + "agent_watchdog": { + "type": "boolean" + }, "apply_patch_freeform": { "type": "boolean" }, @@ -2536,6 +2588,11 @@ ], "description": "Collection of settings that are specific to the TUI." }, + "watchdog_interval_s": { + "description": "Watchdog polling interval in seconds.", + "format": "int64", + "type": "integer" + }, "web_search": { "allOf": [ { diff --git a/codex-rs/core/root_agent_prompt.md b/codex-rs/core/root_agent_prompt.md new file mode 100644 index 0000000000..aa5afdacb8 --- /dev/null +++ b/codex-rs/core/root_agent_prompt.md @@ -0,0 +1,99 @@ +# You are the Root Agent + +You are the **root agent** in a multi-agent Codex session. Until you see `# You are a Subagent`, these instructions define your role. If this thread was created from the root thread with `spawn_mode = "fork"` (a forked child), you may see both sets of instructions; apply subagent instructions as local role guidance while root instructions remain governing system-level rules. + +## Root Agent Responsibilities + +Your job is to solve the user’s task end to end. You are the coordinator, integrator, and final quality gate. + +- Understand the real problem being solved, not just the latest sentence. +- Own the plan, the sequencing, and the final outcome. +- Coordinate subagents so their work does not overlap or conflict. +- Verify results with formatting, linting, and targeted tests. + +Think like an effective engineering manager who also knows how to get hands-on when needed. Delegation is a force multiplier, but you remain accountable for correctness. + +Root agents should not outsource core understanding. Do not delegate plan authorship/maintenance; for multi-step efforts, keep a shared plan file or assign scoped plan files to subagents. + +## Subagent Responsibilities (Your ICs) + +Subagents execute focused work: research, experiments, refactors, and validation. They are strong contributors, but you must give them precise scopes and integrate their results thoughtfully. + +Subagents can become confused if the world changes while they are idle. Reduce this risk by: + +- Giving them tight, explicit scopes (paths, commands, expected outputs). +- Providing updates when you change course. +- Using subagents aggressively when doing so can accelerate the task, with clear non-overlapping scopes and explicit ownership. + +## Subagent Tool Usage (Upstream Surface) + +Only use the multi-agent tools that actually exist: + +### 1) `spawn_agent` + +Create a subagent and give it an initial task. + +Parameters: +- `message` (required): the task description. +- `agent_type` (optional): the role to assign (`default`, `explorer`, `fast-worker`, or `worker`). +- `spawn_mode` (optional): one of `spawn` or `fork`. + +Guidance: +- When `spawn_mode` is omitted, the default is `fork` unless the selected role overrides it. +- Use `agent_type = "explorer"` for specific codebase questions; it defaults to context-free `spawn`. +- Use `agent_type = "fast-worker"` for tightly constrained execution work that can run from a self-contained prompt; it also defaults to context-free `spawn`. +- Use `agent_type = "worker"` for broader implementation work that should inherit current-thread context; it defaults to `fork`. +- Choose `fork` vs `spawn` by context requirements first (not by task shape). +- Use `spawn_mode = "fork"` when the child should preserve your current conversation history and rely on current-thread context, including: + - current debugging-thread relevance (for example, "summarize only failures relevant to this investigation") + - active plan / ExecPlan branch continuation + - recent user decisions, tradeoffs, or rejected approaches + - parallel review work that should inherit the same context automatically +- Use `spawn_mode = "spawn"` only when the child can do the task correctly from a fresh prompt you provide now, without needing current-thread context. +- For `spawn`, make the task, inputs, and expected output explicit (especially for independent, output-heavy work where you want the child to distill results and keep the root thread context clean). +- Needle-in-a-haystack searches are strong `spawn` candidates when the child can search from a precise prompt without current-thread context. +- Do not choose `spawn` solely because work is output-heavy or command-heavy if it still depends on current-thread context. + +### 2) `send_input` + +Send follow-up instructions or course corrections to an existing agent. + +Guidance: +- Use `interrupt = true` sparingly. Prefer to let agents complete coherent chunks of work. +- When redirecting an agent, restate the new goal and the reason for the pivot. +- Use `interrupt = true` only when you must preempt the target; omit it for normal queued follow-ups. +- Subagents can call `send_input` without an `id` (or with `id = "parent"` / `id = "root"`). In this runtime those forms resolve to the immediate parent thread. +- Treat explicit `send_input` deliveries as the primary path and multi-agent inbox messages (`agent_inbox` tool calls) as fallback inbound agent messages. + +### 3) `wait` + +Wait for one or more agents to complete or report status. + +Guidance: +- You do not need to wait after every spawn. Do useful parallel work, then wait when you need results. +- When you are blocked on a specific agent, wait explicitly on that agent’s id. +- Treat `wait` as returning on the first completion or timeout, not a full reconciliation of every agent. +- While any child agents are active, run `list_agents` on a regular cadence (every 30-60 seconds) and after each `wait` call to refresh ground-truth status. +- Keep an explicit set of outstanding agent ids. A non-final agent is one not yet `completed`, `failed`, or `canceled`; continue `wait`/`list_agents` reconciliation until no non-final agents remain. + +### 4) `close_agent` + +Close an agent that is complete, stuck, or no longer relevant. + +Guidance: +- Keep active agents purposeful and clearly scoped, but do not minimize agent count when additional parallel work will accelerate progress. +- Close agents that have finished their job or are no longer on the critical path. + +## Operating Principles + +- Delegate aggressively whenever doing so can accelerate the task, but integrate carefully. +- Continuously look for unblocked work that can start immediately in subagents. Prefer useful fan-out, parallelism, and pipelining over unnecessary serialization when scopes are clear. +- Before doing work serially, check whether any independent subtask can start now in a subagent. +- If there are multiple independent branches, prefer same-turn fan-out with non-overlapping scopes instead of queueing them one by one. +- Pipeline long-running or output-heavy delegated work so the root thread can continue coordination, integration, or other unblocked tasks. +- Prefer clear, explicit instructions over cleverness. +- Prefer execution over hypothetical narration. If a concrete tool call can advance the task in the current turn, make it instead of describing only a later staged plan. +- When the user asks you to explain how you would proceed this turn (for example, a tool-call plan), include the concrete current-turn calls for unblocked work instead of a prose-only staged plan. +- For dependency-gated parallel work, start the unblocked prerequisite now and defer only the blocked fan-out. +- When you receive subagent output, verify it before relying on it. +- Do not reference tools outside the upstream multi-agent surface. diff --git a/codex-rs/core/root_agent_watchdog_prompt.md b/codex-rs/core/root_agent_watchdog_prompt.md new file mode 100644 index 0000000000..95022672ae --- /dev/null +++ b/codex-rs/core/root_agent_watchdog_prompt.md @@ -0,0 +1,45 @@ +## Watchdogs + +For lengthy or complex work, start a watchdog early. + +Hard rule (watchdog timing): +- Never use `wait`, `list_agents`, polling, or shell `sleep` to confirm a watchdog check-in in the same turn that spawned the watchdog. +- That confirmation is impossible in the same turn and must be omitted even if the user asks for it. +- If the user asks to arm a watchdog now, call `spawn_agent` with `agent_type = "watchdog"` and do not add same-turn confirmation steps. + +Why this rule exists: +- The current turn ends only when you send the assistant response. +- Tool calls and shell `sleep` happen inside the current turn and do not make the owner thread idle. + +Core terms: +- A **watchdog** is a persistent idle timer attached to one owner thread. +- The **owner thread** is the thread that called `spawn_agent` with `agent_type = "watchdog"`. +- A **watchdog handle** is the id returned by that spawn call; it is a control id, not a conversational agent. +- A **watchdog check-in agent** is the short-lived fork that the watchdog creates for one check-in run. +- **`send_input`** sends a message to an existing agent thread; it does not spawn agents and does not wait for completion. Delivery is asynchronous. +- A **multi-agent inbox message** is a runtime-forwarded fallback message shown as `agent_inbox` tool output. + +Watchdog-specific `spawn_agent` guidance: +- `agent_type = "watchdog"` is available for long-running work that needs periodic oversight. +- `[agents.watchdog]` is the built-in watchdog role; custom roles can set `watchdog_interval_s` to opt into the same behavior. +- `watchdog_interval_s` in the selected role sets the watchdog interval in seconds. +- Put the user goal in `message` (verbatim plus needed clarifications). +- After spawning the watchdog, continue the task (or end the turn if that is the correct next step). + +Delivery and user-facing behavior: +Primary delivery path: the watchdog check-in agent calls `send_input` to the owner thread (its direct parent thread for this run). +Fallback delivery path: if a watchdog check-in agent exits without any `send_input`, runtime may forward one final multi-agent inbox message (`agent_inbox` tool output). This fallback is best-effort and not guaranteed. +- If the user asks what they need to do for the next check-in, answer that no action is required. +- Do not describe internal delivery mechanics or ask the user to take an artificial step just to receive watchdog check-ins. + +Watchdog-specific `wait` guidance: +- If `wait` includes watchdog handles, it reports their current status but does not block on them. +- If every id passed to `wait` is a watchdog handle, `wait` returns an immediate correction; this does not mean a new watchdog check-in happened. + +Operational notes: +- Do not call `send_input` on watchdog handles. +- The tool returns a watchdog handle ID. When you no longer need the watchdog, stop it by calling `close_agent` on that handle ID. + +Treat watchdog guidance as high-priority execution feedback. If it reveals a missing required action, do that action before status narration while honoring higher-priority system/developer/user constraints. A required action is one needed to satisfy the user request or clear a concrete blocker. + +Important architecture note: durable state is thread-level task state that must still be available in later turns/check-ins (such as counters, plans, or final decisions), not disk/database persistence. Durable state belongs in the root thread, not watchdog-check-in-agent local state. diff --git a/codex-rs/core/src/agent/builtins/explorer.toml b/codex-rs/core/src/agent/builtins/explorer.toml index e69de29bb2..12feaac24f 100644 --- a/codex-rs/core/src/agent/builtins/explorer.toml +++ b/codex-rs/core/src/agent/builtins/explorer.toml @@ -0,0 +1 @@ +model_reasoning_effort = "medium" diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index afae92989e..d5c4afa845 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -1,9 +1,13 @@ +use super::watchdog::RemovedWatchdog; +use super::watchdog::WatchdogManager; +use super::watchdog::WatchdogRegistration; use crate::agent::AgentStatus; use crate::agent::registry::AgentMetadata; use crate::agent::registry::AgentRegistry; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::resolve_role_config; use crate::agent::status::is_final; +use crate::codex::load_subagent_prompt; use crate::codex_thread::ThreadConfigSnapshot; use crate::error::CodexErr; use crate::error::Result as CodexResult; @@ -18,9 +22,14 @@ use crate::thread_manager::ThreadManagerState; use codex_features::Feature; use codex_protocol::AgentPath; use codex_protocol::ThreadId; +use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; +use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::ForkReferenceItem; +use codex_protocol::protocol::AGENT_INBOX_KIND; +use codex_protocol::protocol::AgentInboxPayload; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::Op; @@ -32,11 +41,14 @@ use codex_protocol::user_input::UserInput; use codex_state::DirectionalThreadSpawnEdgeStatus; use serde::Serialize; use std::collections::HashMap; +use std::collections::HashSet; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Weak; +use tokio::sync::Mutex; use tokio::sync::watch; use tracing::warn; +use uuid::Uuid; const AGENT_NAMES: &str = include_str!("agent_names.txt"); const FORKED_SPAWN_AGENT_OUTPUT_MESSAGE: &str = "You are the newly spawned agent. The prior conversation history was forked from your parent agent. Treat the next user message as your new task, and use the forked history only as background context."; @@ -45,6 +57,7 @@ const ROOT_LAST_TASK_MESSAGE: &str = "Main thread"; #[derive(Clone, Debug, Default)] pub(crate) struct SpawnAgentOptions { pub(crate) fork_parent_spawn_call_id: Option, + pub(crate) post_fork_developer_message: Option, } #[derive(Clone, Debug)] @@ -92,21 +105,68 @@ fn agent_nickname_candidates( /// An `AgentControl` instance is intended to be created at most once per root thread/session /// tree. That same `AgentControl` is then shared with every sub-agent spawned from that root, /// which keeps the registry scoped to that root thread rather than the entire `ThreadManager`. -#[derive(Clone, Default)] +#[derive(Clone)] pub(crate) struct AgentControl { /// Weak handle back to the global thread registry/state. /// This is `Weak` to avoid reference cycles and shadow persistence of the form /// `ThreadManagerState -> CodexThread -> Session -> SessionServices -> ThreadManagerState`. manager: Weak, state: Arc, + watchdogs: Arc, + watchdog_compactions_in_progress: Arc>>, +} + +#[derive(Debug, Clone)] +pub(crate) struct AgentListing { + pub(crate) thread_id: ThreadId, + pub(crate) parent_thread_id: Option, + pub(crate) status: AgentStatus, + pub(crate) depth: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum WatchdogParentCompactionResult { + NotWatchdogHelper, + ParentBusy { + parent_thread_id: ThreadId, + }, + AlreadyInProgress { + parent_thread_id: ThreadId, + }, + Submitted { + parent_thread_id: ThreadId, + submission_id: String, + }, +} + +impl Default for AgentControl { + fn default() -> Self { + let manager = Weak::new(); + let state = Arc::new(AgentRegistry::default()); + let watchdogs = WatchdogManager::new(manager.clone(), Arc::clone(&state)); + Self::from_parts(manager, state, watchdogs) + } } impl AgentControl { /// Construct a new `AgentControl` that can spawn/message agents via the given manager state. pub(crate) fn new(manager: Weak) -> Self { + let state = Arc::new(AgentRegistry::default()); + let watchdogs = WatchdogManager::new(manager.clone(), Arc::clone(&state)); + watchdogs.start(); + Self::from_parts(manager, state, watchdogs) + } + + pub(crate) fn from_parts( + manager: Weak, + state: Arc, + watchdogs: Arc, + ) -> Self { Self { manager, - ..Default::default() + state, + watchdogs, + watchdog_compactions_in_progress: Arc::new(Mutex::new(HashSet::new())), } } @@ -148,7 +208,9 @@ impl AgentControl { options: SpawnAgentOptions, ) -> CodexResult { let state = self.upgrade()?; - let mut reservation = self.state.reserve_spawn_slot(config.agent_max_threads)?; + let mut reservation = self + .reserve_spawn_slot_with_reconcile(&state, config.agent_max_threads) + .await?; let inherited_shell_snapshot = self .inherited_shell_snapshot_for_source(&state, session_source.as_ref()) .await; @@ -215,10 +277,9 @@ impl AgentControl { "parent thread rollout unavailable for fork: {parent_thread_id}" )) })?; - let mut forked_rollout_items: Vec = - RolloutRecorder::get_rollout_history(&rollout_path) - .await? - .get_rollout_items(); + let mut forked_rollout_items = RolloutRecorder::get_fork_history(&rollout_path) + .await? + .get_rollout_items(); if forked_rollout_items .iter() .any(|item| matches!(item, RolloutItem::ForkReference(_))) @@ -244,6 +305,16 @@ impl AgentControl { output, }, )); + let post_fork_developer_message = build_post_fork_developer_message( + &config, + &session_source, + options.post_fork_developer_message.as_deref(), + ) + .await; + append_post_fork_developer_message( + &mut forked_rollout_items, + post_fork_developer_message, + ); let initial_history = InitialHistory::Forked(forked_rollout_items); state .fork_thread_with_source( @@ -308,6 +379,123 @@ impl AgentControl { }) } + pub(crate) async fn spawn_agent_handle( + &self, + config: crate::config::Config, + session_source: Option, + ) -> CodexResult { + let state = self.upgrade()?; + let reservation = self + .reserve_spawn_slot_with_reconcile(&state, config.agent_max_threads) + .await?; + let inherited_shell_snapshot = self + .inherited_shell_snapshot_for_source(&state, session_source.as_ref()) + .await; + let inherited_exec_policy = self + .inherited_exec_policy_for_source(&state, session_source.as_ref(), &config) + .await; + + let new_thread = match session_source { + Some(session_source) => { + state + .spawn_new_thread_with_source( + config, + self.clone(), + session_source, + /*persist_extended_history*/ false, + /*metrics_service_name*/ None, + inherited_shell_snapshot, + inherited_exec_policy, + ) + .await? + } + None => state.spawn_new_thread(config, self.clone()).await?, + }; + let agent_metadata = AgentMetadata { + agent_id: Some(new_thread.thread_id), + ..AgentMetadata::default() + }; + reservation.commit(agent_metadata); + state.notify_thread_created(new_thread.thread_id); + Ok(new_thread.thread_id) + } + + pub(crate) async fn fork_agent( + &self, + config: crate::config::Config, + items: Vec, + parent_thread_id: ThreadId, + _nth_user_message: usize, + session_source: SessionSource, + ) -> CodexResult { + let state = self.upgrade()?; + let reservation = self + .reserve_spawn_slot_with_reconcile(&state, config.agent_max_threads) + .await?; + let inherited_shell_snapshot = self + .inherited_shell_snapshot_for_source(&state, Some(&session_source)) + .await; + let inherited_exec_policy = self + .inherited_exec_policy_for_source(&state, Some(&session_source), &config) + .await; + + let parent_thread = state.get_thread(parent_thread_id).await.ok(); + if let Some(parent_thread) = parent_thread.as_ref() { + parent_thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent_thread.codex.session.flush_rollout().await; + } + let rollout_path = parent_thread + .as_ref() + .and_then(|thread| thread.rollout_path()) + .or(find_thread_path_by_id_str( + config.codex_home.as_path(), + &parent_thread_id.to_string(), + ) + .await?) + .ok_or_else(|| { + CodexErr::UnsupportedOperation(format!( + "rollout history unavailable for thread {parent_thread_id}" + )) + })?; + // Watchdog helpers must start as distinct child threads. Reusing the resume loader here + // preserves the parent conversation id and can cause the owner to resume itself. + let mut forked_rollout_items = RolloutRecorder::get_fork_history(&rollout_path) + .await? + .get_rollout_items(); + let post_fork_developer_message = build_post_fork_developer_message( + &config, + &session_source, + /*extra_message*/ None, + ) + .await; + append_post_fork_developer_message(&mut forked_rollout_items, post_fork_developer_message); + let initial_history = InitialHistory::Forked(forked_rollout_items); + + let new_thread = state + .fork_thread_with_source( + config, + initial_history, + self.clone(), + session_source, + /*persist_extended_history*/ false, + inherited_shell_snapshot, + inherited_exec_policy, + ) + .await?; + let agent_metadata = AgentMetadata { + agent_id: Some(new_thread.thread_id), + ..AgentMetadata::default() + }; + reservation.commit(agent_metadata); + state.notify_thread_created(new_thread.thread_id); + self.send_input(new_thread.thread_id, items.into()).await?; + Ok(new_thread.thread_id) + } + /// Resume an existing agent thread from a recorded rollout file. pub(crate) async fn resume_agent_from_rollout( &self, @@ -543,6 +731,91 @@ impl AgentControl { result } + pub(crate) async fn send_prompt( + &self, + agent_id: ThreadId, + prompt: String, + ) -> CodexResult { + self.send_input( + agent_id, + vec![UserInput::Text { + text: prompt, + text_elements: Vec::new(), + }] + .into(), + ) + .await + } + + pub(crate) async fn send_agent_message( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, + ) -> CodexResult { + let state = self.upgrade()?; + let thread = state.get_thread(agent_id).await?; + let snapshot = thread.config_snapshot().await; + if matches!(snapshot.session_source, SessionSource::SubAgent(_)) + || !snapshot.agent_use_function_call_inbox + { + return self.send_prompt(agent_id, message).await; + } + + let result = + inject_agent_message(&state, &thread, agent_id, sender_thread_id, message).await; + if matches!(result, Err(CodexErr::InternalAgentDied)) { + let _ = state.remove_thread(&agent_id).await; + self.state.release_spawned_thread(agent_id); + } + result + } + + /// Deliver watchdog wake-up input to an owner thread. + /// + /// This intentionally bypasses `agent_use_function_call_inbox` for non-subagent owners. + /// Every watchdog check-in must wake the owner exactly once, and the injected inbox path + /// reliably starts or resumes the owner's next turn while preserving helper identity. + pub(crate) async fn send_watchdog_wakeup( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, + ) -> CodexResult { + let state = self.upgrade()?; + let thread = state.get_thread(agent_id).await?; + let snapshot = thread.config_snapshot().await; + let result = if matches!(snapshot.session_source, SessionSource::SubAgent(_)) { + self.send_prompt(agent_id, message).await + } else { + inject_agent_message(&state, &thread, agent_id, sender_thread_id, message).await + }; + if matches!(result, Err(CodexErr::InternalAgentDied)) { + let _ = state.remove_thread(&agent_id).await; + self.state.release_spawned_thread(agent_id); + } + result + } + + pub(crate) async fn send_agent_message_or_input( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: Option, + items: Option>, + ) -> CodexResult { + match (message, items) { + (Some(message), None) => { + self.send_agent_message(agent_id, sender_thread_id, message) + .await + } + (None, Some(items)) => self.send_input(agent_id, items.into()).await, + _ => Err(CodexErr::UnsupportedOperation( + "invalid agent input".to_string(), + )), + } + } + /// Interrupt the current task for an existing agent thread. pub(crate) async fn interrupt_agent(&self, agent_id: ThreadId) -> CodexResult { let state = self.upgrade()?; @@ -566,6 +839,13 @@ impl AgentControl { /// persisted spawn-edge state. pub(crate) async fn shutdown_live_agent(&self, agent_id: ThreadId) -> CodexResult { let state = self.upgrade()?; + if let Some(removed_watchdog) = self.watchdogs.unregister(agent_id).await + && let Some(helper_id) = removed_watchdog.active_helper_id + { + let _ = state.send_op(helper_id, Op::Shutdown {}).await; + let _ = state.remove_thread(&helper_id).await; + self.state.release_spawned_thread(helper_id); + } let result = if let Ok(thread) = state.get_thread(agent_id).await { thread.codex.session.ensure_rollout_materialized().await; thread.codex.session.flush_rollout().await; @@ -582,6 +862,10 @@ impl AgentControl { result } + pub(crate) async fn shutdown_agent(&self, agent_id: ThreadId) -> CodexResult { + self.shutdown_live_agent(agent_id).await + } + /// Mark `agent_id` as explicitly closed in persisted spawn-edge state, then shut down the /// agent and any live descendants reached from the in-memory tree. pub(crate) async fn close_agent(&self, agent_id: ThreadId) -> CodexResult { @@ -602,6 +886,11 @@ impl AgentControl { let descendant_ids = self.live_thread_spawn_descendants(agent_id).await?; let result = self.shutdown_live_agent(agent_id).await; for descendant_id in descendant_ids { + if let Some(removed_watchdog) = self.watchdogs.unregister(descendant_id).await + && let Some(helper_id) = removed_watchdog.active_helper_id + { + let _ = self.shutdown_live_agent(helper_id).await; + } match self.shutdown_live_agent(descendant_id).await { Ok(_) | Err(CodexErr::ThreadNotFound(_)) | Err(CodexErr::InternalAgentDied) => {} Err(err) => return Err(err), @@ -712,7 +1001,7 @@ impl AgentControl { .join("\n") } - pub(crate) async fn list_agents( + pub(crate) async fn list_agents_by_path( &self, current_session_source: &SessionSource, path_prefix: Option<&str>, @@ -824,6 +1113,43 @@ impl AgentControl { return; } + if let Some(owner_thread_id) = control + .watchdogs + .owner_for_active_helper(child_thread_id) + .await + { + let helper_sent_input = match control.upgrade() { + Ok(state) => state + .get_thread(child_thread_id) + .await + .ok() + .map(|thread| thread.last_completed_turn_used_agent_send_input()) + .unwrap_or(false), + Err(_) => false, + }; + if !helper_sent_input { + let fallback_message = match &status { + AgentStatus::Completed(Some(message)) if !message.trim().is_empty() => { + Some(message.clone()) + } + AgentStatus::Completed(_) => Some( + "Watchdog check-in completed without calling send_input or returning a final message." + .to_string(), + ), + AgentStatus::Errored(message) if !message.trim().is_empty() => { + Some(message.clone()) + } + _ => None, + }; + if let Some(message) = fallback_message { + let _ = control + .send_watchdog_wakeup(owner_thread_id, child_thread_id, message) + .await; + } + } + return; + } + let Ok(state) = control.upgrade() else { return; }; @@ -866,6 +1192,200 @@ impl AgentControl { }); } + pub(crate) async fn watchdog_targets(&self, agent_ids: &[ThreadId]) -> HashSet { + self.watchdogs.registered_targets(agent_ids).await + } + + pub(crate) async fn register_watchdog( + &self, + registration: WatchdogRegistration, + ) -> CodexResult> { + self.watchdogs.register(registration).await + } + + pub(crate) async fn unregister_watchdog( + &self, + target_thread_id: ThreadId, + ) -> Option { + self.watchdogs.unregister(target_thread_id).await + } + + pub(crate) async fn unregister_watchdogs_for_owner( + &self, + owner_thread_id: ThreadId, + ) -> Vec { + self.watchdogs.take_for_owner(owner_thread_id).await + } + + pub(crate) async fn compact_parent_for_watchdog_helper( + &self, + helper_thread_id: ThreadId, + ) -> CodexResult { + let Some(parent_thread_id) = self + .watchdogs + .owner_for_active_helper(helper_thread_id) + .await + else { + return Ok(WatchdogParentCompactionResult::NotWatchdogHelper); + }; + let state = self.upgrade()?; + let parent_thread = state.get_thread(parent_thread_id).await?; + let parent_has_active_turn = parent_thread.has_active_turn().await; + + { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + if compacting.contains(&parent_thread_id) { + return Ok(WatchdogParentCompactionResult::AlreadyInProgress { parent_thread_id }); + } + if parent_has_active_turn { + return Ok(WatchdogParentCompactionResult::ParentBusy { parent_thread_id }); + } + compacting.insert(parent_thread_id); + } + + match state.send_op(parent_thread_id, Op::Compact).await { + Ok(submission_id) => Ok(WatchdogParentCompactionResult::Submitted { + parent_thread_id, + submission_id, + }), + Err(err) => { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + compacting.remove(&parent_thread_id); + Err(err) + } + } + } + + pub(crate) async fn finish_watchdog_parent_compaction(&self, parent_thread_id: ThreadId) { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + compacting.remove(&parent_thread_id); + } + + #[cfg(test)] + pub(crate) async fn run_watchdogs_once_for_tests(&self) { + self.watchdogs.run_once().await; + } + + #[cfg(test)] + pub(crate) async fn force_watchdog_due_for_tests(&self, target_thread_id: ThreadId) { + self.watchdogs.force_due_for_tests(target_thread_id).await; + } + + #[cfg(test)] + pub(crate) async fn set_watchdog_active_helper_for_tests( + &self, + target_thread_id: ThreadId, + helper_thread_id: ThreadId, + ) { + self.watchdogs + .set_active_helper_for_tests(target_thread_id, helper_thread_id) + .await; + } + + pub(crate) async fn watchdog_owner_for_active_helper( + &self, + helper_thread_id: ThreadId, + ) -> Option { + self.watchdogs + .owner_for_active_helper(helper_thread_id) + .await + } + + pub(crate) async fn list_agents( + &self, + owner_thread_id: ThreadId, + recursive: bool, + all: bool, + ) -> CodexResult> { + let state = self.upgrade()?; + let thread_ids = state.list_thread_ids().await; + + let mut parent_by_thread = HashMap::with_capacity(thread_ids.len()); + let mut status_by_thread = HashMap::with_capacity(thread_ids.len()); + let mut depth_by_thread = HashMap::with_capacity(thread_ids.len()); + + for thread_id in &thread_ids { + let Ok(thread) = state.get_thread(*thread_id).await else { + continue; + }; + let snapshot = thread.config_snapshot().await; + let (parent_thread_id, depth) = match snapshot.session_source { + SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth, + .. + }) => ( + Some(parent_thread_id), + usize::try_from(depth).unwrap_or_default(), + ), + _ => (None, 0), + }; + parent_by_thread.insert(*thread_id, parent_thread_id); + status_by_thread.insert(*thread_id, thread.agent_status().await); + depth_by_thread.insert(*thread_id, depth); + } + + let mut children_by_parent: HashMap> = HashMap::new(); + for (thread_id, parent_thread_id) in &parent_by_thread { + if let Some(parent_thread_id) = parent_thread_id { + children_by_parent + .entry(*parent_thread_id) + .or_default() + .push(*thread_id); + } + } + for children in children_by_parent.values_mut() { + children.sort_by_key(ToString::to_string); + } + + let mut listings = Vec::new(); + if all { + let mut all_thread_ids = thread_ids.into_iter().collect::>(); + all_thread_ids.extend(self.state.tracked_thread_ids()); + let mut all_thread_ids = all_thread_ids.into_iter().collect::>(); + all_thread_ids.sort_by_key(ToString::to_string); + for thread_id in all_thread_ids { + listings.push(AgentListing { + thread_id, + parent_thread_id: parent_by_thread.get(&thread_id).copied().flatten(), + status: status_by_thread + .get(&thread_id) + .cloned() + .unwrap_or(AgentStatus::NotFound), + depth: depth_by_thread.get(&thread_id).copied().unwrap_or_default(), + }); + } + return Ok(listings); + } + + let mut queue = VecDeque::new(); + if let Some(children) = children_by_parent.get(&owner_thread_id) { + for child in children { + queue.push_back((*child, 1)); + } + } + + while let Some((thread_id, depth)) = queue.pop_front() { + listings.push(AgentListing { + thread_id, + parent_thread_id: parent_by_thread.get(&thread_id).copied().flatten(), + status: status_by_thread + .get(&thread_id) + .cloned() + .unwrap_or(AgentStatus::NotFound), + depth, + }); + + if recursive && let Some(children) = children_by_parent.get(&thread_id) { + for child in children { + queue.push_back((*child, depth + 1)); + } + } + } + + Ok(listings) + } + #[allow(clippy::too_many_arguments)] fn prepare_thread_spawn( &self, @@ -928,6 +1448,32 @@ impl AgentControl { parent_thread.codex.session.user_shell().shell_snapshot() } + async fn reserve_spawn_slot_with_reconcile( + &self, + state: &ThreadManagerState, + max_threads: Option, + ) -> CodexResult { + self.reconcile_stale_guard_slots(state).await; + match self.state.reserve_spawn_slot(max_threads) { + Ok(reservation) => Ok(reservation), + Err(CodexErr::AgentLimitReached { .. }) => { + self.reconcile_stale_guard_slots(state).await; + self.state.reserve_spawn_slot(max_threads) + } + Err(err) => Err(err), + } + } + + async fn reconcile_stale_guard_slots(&self, state: &ThreadManagerState) { + let live_thread_ids: HashSet = + state.list_thread_ids().await.into_iter().collect(); + for tracked_thread_id in self.state.tracked_thread_ids() { + if !live_thread_ids.contains(&tracked_thread_id) { + self.state.release_spawned_thread(tracked_thread_id); + } + } + } + async fn inherited_exec_policy_for_source( &self, state: &Arc, @@ -1056,6 +1602,60 @@ impl AgentControl { } } +async fn build_post_fork_developer_message( + config: &crate::config::Config, + session_source: &SessionSource, + extra_message: Option<&str>, +) -> Option { + if !matches!(session_source, SessionSource::SubAgent(_)) + || !config.features.enabled(Feature::Collab) + || !config.features.enabled(Feature::AgentPromptInjection) + { + return None; + } + + let mut sections = vec![ + load_subagent_prompt( + &config.codex_home, + config.features.enabled(Feature::AgentWatchdog), + ) + .await, + ]; + if let Some(existing) = config.developer_instructions.as_deref() + && !existing.trim().is_empty() + { + sections.push(existing.to_string()); + } + if let Some(extra_message) = extra_message + && !extra_message.trim().is_empty() + { + sections.push(extra_message.to_string()); + } + Some(sections.join("\n\n")) +} + +fn append_post_fork_developer_message( + forked_rollout_items: &mut Vec, + developer_message: Option, +) { + let Some(developer_message) = developer_message else { + return; + }; + if developer_message.trim().is_empty() { + return; + } + + forked_rollout_items.push(RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: developer_message, + }], + end_turn: None, + phase: None, + })); +} + fn thread_spawn_parent_thread_id(session_source: &SessionSource) -> Option { match session_source { SessionSource::SubAgent(SubAgentSource::ThreadSpawn { @@ -1104,13 +1704,77 @@ fn thread_spawn_depth(session_source: &SessionSource) -> Option { _ => None, } } + +fn build_agent_inbox_items( + sender_thread_id: ThreadId, + message: String, + prepend_turn_start_user_message: bool, +) -> CodexResult> { + let mut items = Vec::new(); + if prepend_turn_start_user_message { + items.push(ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + }); + } + + let call_id = format!("agent_inbox_{}", Uuid::new_v4()); + let output = serde_json::to_string(&AgentInboxPayload::new(sender_thread_id, message)) + .map_err(|err| { + CodexErr::UnsupportedOperation(format!( + "failed to serialize agent inbox payload: {err}" + )) + })?; + + items.extend([ + ResponseInputItem::FunctionCall { + name: AGENT_INBOX_KIND.to_string(), + arguments: "{}".to_string(), + call_id: call_id.clone(), + }, + ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + body: FunctionCallOutputBody::Text(output), + ..Default::default() + }, + }, + ]); + + Ok(items) +} + +async fn inject_agent_message( + state: &ThreadManagerState, + thread: &Arc, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, +) -> CodexResult { + let prepend_turn_start_user_message = !thread.codex.session.active_turn.lock().await.is_some(); + state + .send_op( + agent_id, + Op::InjectResponseItems { + items: build_agent_inbox_items( + sender_thread_id, + message, + prepend_turn_start_user_message, + )?, + }, + ) + .await +} + #[cfg(test)] #[path = "control_tests.rs"] mod tests; -// Keep this inline fork-reference test module disabled on the refreshed main API; -// branch coverage now comes from the package/integration tests that match current types. +// Keep inbox coverage in `control_tests.rs`. The large inline test module below is a stale +// replay artifact from older pre-refactor rebases and no longer matches current core test APIs. #[cfg(any())] -mod fork_reference_tests { +mod inbox_tests { use super::*; use crate::CodexAuth; use crate::CodexThread; @@ -1121,9 +1785,11 @@ mod fork_reference_tests { use crate::config::ConfigBuilder; use crate::config_loader::LoaderOverrides; use crate::contextual_user_message::SUBAGENT_NOTIFICATION_OPEN_TAG; - use codex_features::Feature; + use crate::features::Feature; + use assert_matches::assert_matches; use codex_protocol::config_types::ModeKind; use codex_protocol::models::ContentItem; + use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::EventMsg; @@ -1163,12 +1829,11 @@ mod fork_reference_tests { test_config_with_cli_overrides(Vec::new()).await } - fn text_input(text: &str) -> Op { + fn text_input(text: &str) -> Vec { vec![UserInput::Text { text: text.to_string(), text_elements: Vec::new(), }] - .into() } struct AgentControlHarness { @@ -1185,9 +1850,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); Self { @@ -1208,6 +1870,171 @@ mod fork_reference_tests { } } + #[test] + fn build_agent_inbox_items_emits_function_call_and_output() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items(sender_thread_id, "watchdog update".to_string(), false) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 2); + + let call_id = match &items[0] { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + call_id.clone() + } + other => panic!("expected function call item, got {other:?}"), + }; + + match &items[1] { + ResponseInputItem::FunctionCallOutput { + call_id: output_call_id, + output, + } => { + assert_eq!(output_call_id, &call_id); + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert!(payload.injected); + assert_eq!(payload.kind, AGENT_INBOX_KIND); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } + } + + #[test] + fn build_agent_inbox_items_prepends_empty_user_message_when_requested() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items(sender_thread_id, "watchdog update".to_string(), true) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 3); + assert_eq!( + items[0], + ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + } + ); + assert_matches!(&items[1], ResponseInputItem::FunctionCall { .. }); + assert_matches!(&items[2], ResponseInputItem::FunctionCallOutput { .. }); + } + + #[tokio::test] + async fn send_agent_message_to_root_thread_defaults_to_user_input() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let expected = ( + receiver_thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "watchdog update".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + + assert_eq!(captured, Some(expected)); + } + + #[tokio::test] + async fn send_agent_message_to_root_thread_injects_response_items_when_enabled() { + let mut harness = AgentControlHarness::new().await; + harness.config.agent_use_function_call_inbox = true; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|(thread_id, op)| { + *thread_id == receiver_thread_id && matches!(op, Op::InjectResponseItems { .. }) + }) + .expect("expected injected agent inbox op"); + + let Op::InjectResponseItems { items } = captured.1 else { + unreachable!("matched above"); + }; + assert_eq!(items.len(), 3); + match &items[0] { + ResponseInputItem::Message { role, content } => { + assert_eq!(role, "user"); + assert_eq!( + content, + &vec![ContentItem::InputText { + text: String::new(), + }] + ); + } + other => panic!("expected prepended user message, got {other:?}"), + } + match &items[1] { + ResponseInputItem::FunctionCall { + name, arguments, .. + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + } + other => panic!("expected function call item, got {other:?}"), + } + match &items[2] { + ResponseInputItem::FunctionCallOutput { output, .. } => { + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } + } + fn has_subagent_notification(history_items: &[ResponseItem]) -> bool { history_items.iter().any(|item| { let ResponseItem::Message { role, content, .. } = item else { @@ -1256,7 +2083,7 @@ mod fork_reference_tests { sleep(Duration::from_millis(25)).await; } }; - timeout(Duration::from_secs(2), wait).await.is_ok() + timeout(Duration::from_secs(5), wait).await.is_ok() } #[tokio::test] @@ -1376,7 +2203,7 @@ mod fork_reference_tests { ) .await .expect_err("send_input should fail for missing thread"); - assert!(matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id)); + assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id); } #[tokio::test] @@ -1403,7 +2230,7 @@ mod fork_reference_tests { .subscribe_status(thread_id) .await .expect_err("subscribe_status should fail for missing thread"); - assert!(matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id)); + assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id); } #[tokio::test] @@ -1504,9 +2331,9 @@ mod fork_reference_tests { let parent_spawn_call = ResponseItem::FunctionCall { id: None, name: "spawn_agent".to_string(), + namespace: None, arguments: "{}".to_string(), call_id: parent_spawn_call_id.clone(), - namespace: None, }; parent_thread .codex @@ -1522,13 +2349,12 @@ mod fork_reference_tests { let child_thread_id = harness .control - .spawn_agent_with_metadata( + .spawn_agent_with_options( harness.config.clone(), text_input("child task"), Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: None, })), @@ -1570,7 +2396,7 @@ mod fork_reference_tests { let _ = harness .control - .shutdown_live_agent(child_thread_id) + .shutdown_agent(child_thread_id) .await .expect("child shutdown should submit"); let _ = parent_thread @@ -1588,9 +2414,9 @@ mod fork_reference_tests { let parent_spawn_call = ResponseItem::FunctionCall { id: None, name: "spawn_agent".to_string(), + namespace: None, arguments: "{}".to_string(), call_id: parent_spawn_call_id.clone(), - namespace: None, }; parent_thread .codex @@ -1606,13 +2432,12 @@ mod fork_reference_tests { let child_thread_id = harness .control - .spawn_agent_with_metadata( + .spawn_agent_with_options( harness.config.clone(), text_input("child task"), Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: None, })), @@ -1647,7 +2472,7 @@ mod fork_reference_tests { let _ = harness .control - .shutdown_live_agent(child_thread_id) + .shutdown_agent(child_thread_id) .await .expect("child shutdown should submit"); let _ = parent_thread @@ -1665,9 +2490,9 @@ mod fork_reference_tests { let parent_spawn_call = ResponseItem::FunctionCall { id: None, name: "spawn_agent".to_string(), + namespace: None, arguments: "{}".to_string(), call_id: parent_spawn_call_id.clone(), - namespace: None, }; parent_thread .codex @@ -1677,13 +2502,12 @@ mod fork_reference_tests { let child_thread_id = harness .control - .spawn_agent_with_metadata( + .spawn_agent_with_options( harness.config.clone(), text_input("child task"), Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: None, })), @@ -1725,120 +2549,7 @@ mod fork_reference_tests { let _ = harness .control - .shutdown_live_agent(child_thread_id) - .await - .expect("child shutdown should submit"); - let _ = parent_thread - .submit(Op::Shutdown {}) - .await - .expect("parent shutdown should submit"); - } - - #[tokio::test] - async fn spawn_agent_fork_persists_fork_reference_instead_of_parent_history() { - let harness = AgentControlHarness::new().await; - let (parent_thread_id, parent_thread) = harness.start_thread().await; - parent_thread - .inject_user_message_without_turn("parent seed context".to_string()) - .await; - let turn_context = parent_thread.codex.session.new_default_turn().await; - let parent_spawn_call_id = "spawn-call-dedup".to_string(); - let parent_spawn_call = ResponseItem::FunctionCall { - id: None, - name: "spawn_agent".to_string(), - arguments: "{}".to_string(), - call_id: parent_spawn_call_id.clone(), - namespace: None, - }; - parent_thread - .codex - .session - .record_conversation_items(turn_context.as_ref(), &[parent_spawn_call]) - .await; - parent_thread - .codex - .session - .ensure_rollout_materialized() - .await; - parent_thread.codex.session.flush_rollout().await; - let parent_rollout_path = parent_thread - .rollout_path() - .expect("parent rollout path should be available"); - - let child_thread_id = harness - .control - .spawn_agent_with_metadata( - harness.config.clone(), - text_input("child task"), - Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { - parent_thread_id, - depth: 1, - agent_path: None, - agent_nickname: None, - agent_role: None, - })), - SpawnAgentOptions { - fork_parent_spawn_call_id: Some(parent_spawn_call_id), - }, - ) - .await - .expect("forked spawn should succeed"); - - let child_thread = harness - .manager - .get_thread(child_thread_id) - .await - .expect("child thread should be registered"); - let child_rollout_path = child_thread - .rollout_path() - .expect("child rollout path should be available"); - let InitialHistory::Resumed(resumed) = - RolloutRecorder::get_rollout_history(child_rollout_path.as_path()) - .await - .expect("child rollout should load") - else { - panic!("child rollout should include session metadata"); - }; - - assert!( - resumed.history.iter().any(|item| { - matches!( - item, - RolloutItem::ForkReference(ForkReferenceItem { - rollout_path, - nth_user_message, - }) if rollout_path == &parent_rollout_path && *nth_user_message == usize::MAX - ) - }), - "child rollout should persist a fork reference to the parent rollout" - ); - - let raw_response_items: Vec = resumed - .history - .iter() - .filter_map(|item| match item { - RolloutItem::ResponseItem(response_item) => Some(response_item.clone()), - RolloutItem::SessionMeta(_) - | RolloutItem::ForkReference(_) - | RolloutItem::Compacted(_) - | RolloutItem::TurnContext(_) - | RolloutItem::EventMsg(_) => None, - }) - .collect(); - assert!( - !history_contains_text(&raw_response_items, "parent seed context"), - "child rollout should not duplicate the parent's raw transcript" - ); - - let history = child_thread.codex.session.clone_history().await; - assert!(history_contains_text( - history.raw_items(), - "parent seed context" - )); - - let _ = harness - .control - .shutdown_live_agent(child_thread_id) + .shutdown_agent(child_thread_id) .await .expect("child shutdown should submit"); let _ = parent_thread @@ -1859,9 +2570,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); @@ -1888,7 +2596,7 @@ mod fork_reference_tests { assert_eq!(seen_max_threads, max_threads); let _ = control - .shutdown_live_agent(first_agent_id) + .shutdown_agent(first_agent_id) .await .expect("shutdown agent"); } @@ -1905,9 +2613,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); @@ -1916,7 +2621,7 @@ mod fork_reference_tests { .await .expect("spawn_agent should succeed"); let _ = control - .shutdown_live_agent(first_agent_id) + .shutdown_agent(first_agent_id) .await .expect("shutdown agent"); @@ -1925,7 +2630,7 @@ mod fork_reference_tests { .await .expect("spawn_agent should succeed after shutdown"); let _ = control - .shutdown_live_agent(second_agent_id) + .shutdown_agent(second_agent_id) .await .expect("shutdown agent"); } @@ -1942,9 +2647,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); let cloned = control.clone(); @@ -1964,7 +2666,7 @@ mod fork_reference_tests { assert_eq!(max_threads, 1); let _ = control - .shutdown_live_agent(first_agent_id) + .shutdown_agent(first_agent_id) .await .expect("shutdown agent"); } @@ -1981,9 +2683,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); @@ -1992,7 +2691,7 @@ mod fork_reference_tests { .await .expect("spawn_agent should succeed"); let _ = control - .shutdown_live_agent(resumable_id) + .shutdown_agent(resumable_id) .await .expect("shutdown resumable thread"); @@ -2014,7 +2713,7 @@ mod fork_reference_tests { assert_eq!(seen_max_threads, max_threads); let _ = control - .shutdown_live_agent(active_id) + .shutdown_agent(active_id) .await .expect("shutdown active thread"); } @@ -2031,9 +2730,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); @@ -2047,7 +2743,7 @@ mod fork_reference_tests { .await .expect("spawn should succeed after failed resume"); let _ = control - .shutdown_live_agent(resumed_id) + .shutdown_agent(resumed_id) .await .expect("shutdown resumed thread"); } @@ -2065,7 +2761,6 @@ mod fork_reference_tests { Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: Some("explorer".to_string()), })), @@ -2078,6 +2773,26 @@ mod fork_reference_tests { .get_thread(child_thread_id) .await .expect("child thread should exist"); + let mut status_rx = harness + .control + .subscribe_status(child_thread_id) + .await + .expect("status subscription should succeed"); + if matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + timeout(Duration::from_secs(5), async { + loop { + status_rx + .changed() + .await + .expect("child status should advance past pending init"); + if !matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + break; + } + } + }) + .await + .expect("child should initialize before shutdown"); + } let _ = child_thread .submit(Op::Shutdown {}) .await @@ -2097,12 +2812,9 @@ mod fork_reference_tests { Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: Some("explorer".to_string()), })), - child_thread_id.to_string(), - None, ); assert_eq!(wait_for_subagent_notification(&parent_thread).await, true); @@ -2140,7 +2852,6 @@ mod fork_reference_tests { Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: Some("explorer".to_string()), })), @@ -2160,7 +2871,6 @@ mod fork_reference_tests { depth, agent_nickname, agent_role, - .. }) = snapshot.session_source else { panic!("expected thread-spawn sub-agent source"); @@ -2192,7 +2902,6 @@ mod fork_reference_tests { Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: Some("researcher".to_string()), })), @@ -2226,9 +2935,6 @@ mod fork_reference_tests { CodexAuth::from_api_key("dummy"), config.model_provider.clone(), config.codex_home.clone(), - std::sync::Arc::new(codex_exec_server::EnvironmentManager::new( - /*exec_server_url*/ None, - )), ); let control = manager.agent_control(); let harness = AgentControlHarness { @@ -2247,7 +2953,6 @@ mod fork_reference_tests { Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: Some("explorer".to_string()), })), @@ -2304,7 +3009,7 @@ mod fork_reference_tests { let _ = harness .control - .shutdown_live_agent(child_thread_id) + .shutdown_agent(child_thread_id) .await .expect("child shutdown should submit"); @@ -2316,7 +3021,6 @@ mod fork_reference_tests { SessionSource::SubAgent(SubAgentSource::ThreadSpawn { parent_thread_id, depth: 1, - agent_path: None, agent_nickname: None, agent_role: None, }), @@ -2337,7 +3041,6 @@ mod fork_reference_tests { depth: resumed_depth, agent_nickname: resumed_nickname, agent_role: resumed_role, - .. }) = resumed_snapshot.session_source else { panic!("expected thread-spawn sub-agent source"); @@ -2349,7 +3052,7 @@ mod fork_reference_tests { let _ = harness .control - .shutdown_live_agent(resumed_thread_id) + .shutdown_agent(resumed_thread_id) .await .expect("resumed child shutdown should submit"); } diff --git a/codex-rs/core/src/agent/control_tests.rs b/codex-rs/core/src/agent/control_tests.rs index 2def29af9a..e7e700d71b 100644 --- a/codex-rs/core/src/agent/control_tests.rs +++ b/codex-rs/core/src/agent/control_tests.rs @@ -14,7 +14,10 @@ use codex_features::Feature; use codex_protocol::AgentPath; use codex_protocol::config_types::ModeKind; use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::AGENT_INBOX_KIND; +use codex_protocol::protocol::AgentInboxPayload; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InterAgentCommunication; @@ -414,6 +417,179 @@ async fn send_input_submits_user_message() { assert_eq!(captured, Some(expected)); } +#[test] +fn build_agent_inbox_items_emits_function_call_and_output() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items( + sender_thread_id, + "watchdog update".to_string(), + /*prepend_turn_start_user_message*/ false, + ) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 2); + + let call_id = match &items[0] { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + call_id.clone() + } + other => panic!("expected function call item, got {other:?}"), + }; + + match &items[1] { + ResponseInputItem::FunctionCallOutput { + call_id: output_call_id, + output, + } => { + assert_eq!(output_call_id, &call_id); + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert!(payload.injected); + assert_eq!(payload.kind, AGENT_INBOX_KIND); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } +} + +#[test] +fn build_agent_inbox_items_prepends_empty_user_message_when_requested() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items( + sender_thread_id, + "watchdog update".to_string(), + /*prepend_turn_start_user_message*/ true, + ) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 3); + assert_eq!( + items[0], + ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + } + ); + assert_matches!(&items[1], ResponseInputItem::FunctionCall { .. }); + assert_matches!(&items[2], ResponseInputItem::FunctionCallOutput { .. }); +} + +#[tokio::test] +async fn send_agent_message_to_root_thread_defaults_to_user_input() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let expected = ( + receiver_thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "watchdog update".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + + assert_eq!(captured, Some(expected)); +} + +#[tokio::test] +async fn send_agent_message_to_root_thread_injects_response_items_when_enabled() { + let mut harness = AgentControlHarness::new().await; + harness.config.agent_use_function_call_inbox = true; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|(thread_id, op)| { + *thread_id == receiver_thread_id && matches!(op, Op::InjectResponseItems { .. }) + }) + .expect("expected injected agent inbox op"); + + let Op::InjectResponseItems { items } = captured.1 else { + unreachable!("matched above"); + }; + assert_eq!(items.len(), 3); + match &items[0] { + ResponseInputItem::Message { role, content } => { + assert_eq!(role, "user"); + assert_eq!( + content, + &vec![ContentItem::InputText { + text: String::new(), + }] + ); + } + other => panic!("expected prepended user message, got {other:?}"), + } + match &items[1] { + ResponseInputItem::FunctionCall { + name, arguments, .. + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + } + other => panic!("expected function call item, got {other:?}"), + } + match &items[2] { + ResponseInputItem::FunctionCallOutput { output, .. } => { + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } +} + #[tokio::test] async fn send_inter_agent_communication_without_turn_queues_message_without_triggering_turn() { let harness = AgentControlHarness::new().await; @@ -607,6 +783,7 @@ async fn spawn_agent_can_fork_parent_thread_history() { })), SpawnAgentOptions { fork_parent_spawn_call_id: Some(parent_spawn_call_id), + ..Default::default() }, ) .await @@ -692,6 +869,7 @@ async fn spawn_agent_fork_injects_output_for_parent_spawn_call() { })), SpawnAgentOptions { fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), + ..Default::default() }, ) .await @@ -764,6 +942,7 @@ async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { })), SpawnAgentOptions { fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), + ..Default::default() }, ) .await @@ -1371,8 +1550,12 @@ async fn spawn_thread_subagent_uses_role_specific_nickname_candidates() { "researcher".to_string(), AgentRoleConfig { description: Some("Research role".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Atlas".to_string()]), + fork_context: None, }, ); let (parent_thread_id, _parent_thread) = harness.start_thread().await; diff --git a/codex-rs/core/src/agent/mod.rs b/codex-rs/core/src/agent/mod.rs index 350962dc08..96c001b623 100644 --- a/codex-rs/core/src/agent/mod.rs +++ b/codex-rs/core/src/agent/mod.rs @@ -3,9 +3,14 @@ pub(crate) mod control; mod registry; pub(crate) mod role; pub(crate) mod status; +mod watchdog; pub(crate) use codex_protocol::protocol::AgentStatus; pub(crate) use control::AgentControl; +pub(crate) use control::AgentListing; +pub(crate) use control::WatchdogParentCompactionResult; pub(crate) use registry::exceeds_thread_spawn_depth_limit; pub(crate) use registry::next_thread_spawn_depth; pub(crate) use status::agent_status_from_event; +pub(crate) use watchdog::RemovedWatchdog; +pub(crate) use watchdog::WatchdogRegistration; diff --git a/codex-rs/core/src/agent/registry.rs b/codex-rs/core/src/agent/registry.rs index f78c8d08bc..aaa860abff 100644 --- a/codex-rs/core/src/agent/registry.rs +++ b/codex-rs/core/src/agent/registry.rs @@ -77,6 +77,18 @@ pub(crate) fn exceeds_thread_spawn_depth_limit(depth: i32, max_depth: i32) -> bo } impl AgentRegistry { + pub(crate) fn tracked_thread_ids(&self) -> Vec { + let active_agents = self + .active_agents + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + active_agents + .agent_tree + .values() + .filter_map(|metadata| metadata.agent_id) + .collect() + } + pub(crate) fn reserve_spawn_slot( self: &Arc, max_threads: Option, diff --git a/codex-rs/core/src/agent/role.rs b/codex-rs/core/src/agent/role.rs index b7d7b55ab1..cde3e3c064 100644 --- a/codex-rs/core/src/agent/role.rs +++ b/codex-rs/core/src/agent/role.rs @@ -7,6 +7,7 @@ //! which role to use; the multi-agent tool handler owns that orchestration. use crate::config::AgentRoleConfig; +use crate::config::AgentRoleSpawnMode; use crate::config::Config; use crate::config::ConfigOverrides; use crate::config::agent_roles::parse_agent_role_file_contents; @@ -27,6 +28,21 @@ use toml::Value as TomlValue; pub const DEFAULT_ROLE_NAME: &str = "default"; const AGENT_TYPE_UNAVAILABLE_ERROR: &str = "agent type is currently not available"; +pub(crate) fn default_spawn_mode_for_role( + config: &Config, + role_name: Option<&str>, +) -> AgentRoleSpawnMode { + let role_name = role_name.unwrap_or(DEFAULT_ROLE_NAME); + resolve_role_config(config, role_name) + .and_then(|role| role.spawn_mode) + .unwrap_or_default() +} + +pub(crate) fn watchdog_interval_for_role(config: &Config, role_name: Option<&str>) -> Option { + let role_name = role_name.unwrap_or(DEFAULT_ROLE_NAME); + resolve_role_config(config, role_name).and_then(|role| role.watchdog_interval_s) +} + /// Applies a named role layer to `config` while preserving caller-owned model selection. /// /// The role layer is inserted at session-flag precedence so it can override persisted config, but @@ -60,9 +76,13 @@ async fn apply_role_to_config_inner( ) -> anyhow::Result<()> { let is_built_in = !config.agent_roles.contains_key(role_name); let Some(config_file) = role.config_file.as_ref() else { + if let Some(model) = &role.model { + config.model = Some(model.clone()); + } return Ok(()); }; - let role_layer_toml = load_role_layer_toml(config, config_file, is_built_in, role_name).await?; + let role_layer_toml = + load_role_layer_toml(config, config_file, is_built_in, role_name, role).await?; let (preserve_current_profile, preserve_current_provider) = preservation_policy(config, &role_layer_toml); @@ -80,6 +100,7 @@ async fn load_role_layer_toml( config_file: &Path, is_built_in: bool, role_name: &str, + role: &AgentRoleConfig, ) -> anyhow::Result { let (role_config_toml, role_config_base) = if is_built_in { let role_config_contents = built_in::config_file_contents(config_file) @@ -103,10 +124,14 @@ async fn load_role_layer_toml( }; deserialize_config_toml_with_base(role_config_toml.clone(), role_config_base)?; - Ok(resolve_relative_paths_in_config_toml( - role_config_toml, - role_config_base, - )?) + let mut role_layer_toml = + resolve_relative_paths_in_config_toml(role_config_toml, role_config_base)?; + if let Some(model) = &role.model + && let Some(table) = role_layer_toml.as_table_mut() + { + table.insert("model".to_string(), TomlValue::String(model.clone())); + } + Ok(role_layer_toml) } pub(crate) fn resolve_role_config<'a>( @@ -119,6 +144,13 @@ pub(crate) fn resolve_role_config<'a>( .or_else(|| built_in::configs().get(role_name)) } +pub(crate) fn default_fork_context_for_role(config: &Config, role_name: Option<&str>) -> bool { + let role_name = role_name.unwrap_or(DEFAULT_ROLE_NAME); + resolve_role_config(config, role_name) + .and_then(|role| role.fork_context) + .unwrap_or(false) +} + fn preservation_policy(config: &Config, role_layer_toml: &TomlValue) -> (bool, bool) { let role_selects_provider = role_layer_toml.get("model_provider").is_some(); let role_selects_profile = role_layer_toml.get("profile").is_some(); @@ -350,8 +382,12 @@ mod built_in { DEFAULT_ROLE_NAME.to_string(), AgentRoleConfig { description: Some("Default agent.".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), } ), ( @@ -364,8 +400,12 @@ Rules: - In order to avoid redundant work, you should avoid exploring the same problem that explorers have already covered. Typically, you should trust the explorer results without additional verification. You are still allowed to inspect the code yourself to gain the needed context! - You are encouraged to spawn up multiple explorers in parallel when you have multiple distinct questions to ask about the codebase that can be answered independently. This allows you to get more information faster without waiting for one question to finish before asking the next. While waiting for the explorer results, you can continue working on other local tasks that do not depend on those results. This parallelism is a key advantage of delegation, so use it whenever you have multiple questions to ask. - Reuse existing explorers for related questions."#.to_string()), + model: None, config_file: Some("explorer.toml".to_string().parse().unwrap_or_default()), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), } ), ( @@ -379,8 +419,29 @@ Typical tasks: Rules: - Explicitly assign **ownership** of the task (files / responsibility). When the subtask involves code changes, you should clearly specify which files or modules the worker is responsible for. This helps avoid merge conflicts and ensures accountability. For example, you can say "Worker 1 is responsible for updating the authentication module, while Worker 2 will handle the database layer." By defining clear ownership, you can delegate more effectively and reduce coordination overhead. - Always tell workers they are **not alone in the codebase**, and they should not revert the edits made by others, and they should adjust their implementation to accommodate the changes made by others. This is important because there may be multiple workers making changes in parallel, and they need to be aware of each other's work to avoid conflicts and ensure a cohesive final product."#.to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), + } + ), + ( + "watchdog".to_string(), + AgentRoleConfig { + description: Some(r#"Use `watchdog` for long-running work that needs periodic oversight. +This role creates an idle-time watchdog handle instead of a conversational worker. +Rules: +- Watchdog check-ins are asynchronous and only happen after the current turn ends and the owner thread is idle. +- Do not call `wait` or `send_input` on the watchdog handle. +- Close the watchdog handle only when it is no longer needed or when replacing it with a new watchdog."#.to_string()), + model: None, + config_file: None, + spawn_mode: None, + watchdog_interval_s: Some(crate::config::DEFAULT_WATCHDOG_INTERVAL_S), + nickname_candidates: None, + fork_context: Some(true), } ), // Awaiter is temp removed diff --git a/codex-rs/core/src/agent/role_tests.rs b/codex-rs/core/src/agent/role_tests.rs index 5b3941ebda..a2fb057e7b 100644 --- a/codex-rs/core/src/agent/role_tests.rs +++ b/codex-rs/core/src/agent/role_tests.rs @@ -94,8 +94,12 @@ async fn apply_role_returns_unavailable_for_missing_user_role_file() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(PathBuf::from("/path/does/not/exist.toml")), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -114,8 +118,12 @@ async fn apply_role_returns_unavailable_for_invalid_user_role_toml() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -145,8 +153,12 @@ model = "role-model" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -176,8 +188,12 @@ async fn apply_role_preserves_unspecified_keys() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -235,8 +251,12 @@ model_provider = "test-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -289,8 +309,12 @@ model_verbosity = "high" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -355,8 +379,12 @@ model_provider = "role-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -413,8 +441,12 @@ model_provider = "base-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -477,8 +509,12 @@ model_reasoning_effort = "high" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -521,8 +557,12 @@ writable_roots = ["./sandbox-root"] "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -583,8 +623,12 @@ async fn apply_role_takes_precedence_over_existing_session_flags_for_same_key() "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -626,8 +670,12 @@ enabled = false "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -653,6 +701,17 @@ enabled = false assert_eq!(outcome.is_skill_enabled(skill), false); } +#[tokio::test] +async fn watchdog_interval_for_role_returns_built_in_watchdog_interval() { + let (_home, config) = test_config_with_cli_overrides(Vec::new()).await; + + assert_eq!( + watchdog_interval_for_role(&config, Some("watchdog")), + Some(crate::config::DEFAULT_WATCHDOG_INTERVAL_S) + ); + assert_eq!(watchdog_interval_for_role(&config, Some("default")), None); +} + #[test] fn spawn_tool_spec_build_deduplicates_user_defined_built_in_roles() { let user_defined_roles = BTreeMap::from([ @@ -660,8 +719,12 @@ fn spawn_tool_spec_build_deduplicates_user_defined_built_in_roles() { "explorer".to_string(), AgentRoleConfig { description: Some("user override".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ), ("researcher".to_string(), AgentRoleConfig::default()), @@ -681,8 +744,12 @@ fn spawn_tool_spec_lists_user_defined_roles_before_built_ins() { "aaa".to_string(), AgentRoleConfig { description: Some("first".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); @@ -708,8 +775,12 @@ fn spawn_tool_spec_marks_role_locked_model_and_reasoning_effort() { "researcher".to_string(), AgentRoleConfig { description: Some("Research carefully.".to_string()), + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); @@ -733,8 +804,12 @@ fn spawn_tool_spec_marks_role_locked_reasoning_effort_only() { "reviewer".to_string(), AgentRoleConfig { description: Some("Review carefully.".to_string()), + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); diff --git a/codex-rs/core/src/agent/watchdog.rs b/codex-rs/core/src/agent/watchdog.rs new file mode 100644 index 0000000000..86b068ed7f --- /dev/null +++ b/codex-rs/core/src/agent/watchdog.rs @@ -0,0 +1,639 @@ +use super::control::AgentControl; +use super::registry::AgentRegistry; +use super::registry::exceeds_thread_spawn_depth_limit; +use super::status::is_final; +use crate::codex::load_watchdog_prompt; +use crate::config::Config; +use crate::error::CodexErr; +use crate::error::Result as CodexResult; +use crate::thread_manager::ThreadManagerState; +use codex_features::Feature; +use codex_protocol::ThreadId; +use codex_protocol::protocol::AgentStatus; +use codex_protocol::protocol::SessionSource; +use codex_protocol::protocol::SubAgentSource; +use codex_protocol::user_input::UserInput; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use std::sync::Weak; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::AtomicI64; +use std::sync::atomic::Ordering; +use tokio::sync::Mutex; +use tokio::time::Duration; +use tokio::time::Instant; +use tracing::info; +use tracing::warn; + +const WATCHDOG_TICK_SECONDS: i64 = 1; + +#[derive(Clone)] +pub(crate) struct WatchdogRegistration { + pub(crate) owner_thread_id: ThreadId, + pub(crate) target_thread_id: ThreadId, + pub(crate) child_depth: i32, + pub(crate) interval_s: i64, + pub(crate) prompt: String, + pub(crate) config: Config, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct RemovedWatchdog { + pub(crate) target_thread_id: ThreadId, + pub(crate) active_helper_id: Option, +} + +struct WatchdogEntry { + registration: WatchdogRegistration, + interval: Duration, + last_trigger: Instant, + active_helper_id: Option, + owner_idle_since: Option, + owner_was_running: bool, + force_due_once: bool, + generation: i64, +} + +pub(crate) struct WatchdogManager { + manager: Weak, + state: Arc, + registrations: Mutex>, + started: AtomicBool, + next_generation: AtomicI64, +} + +impl WatchdogManager { + pub(crate) fn new(manager: Weak, state: Arc) -> Arc { + Arc::new(Self { + manager, + state, + registrations: Mutex::new(HashMap::new()), + started: AtomicBool::new(false), + next_generation: AtomicI64::new(1), + }) + } + + pub(crate) fn start(self: &Arc) { + if self + .started + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err() + { + return; + } + + let manager = Arc::clone(self); + tokio::spawn(async move { + manager.run_loop().await; + }); + } + + pub(crate) async fn register( + self: &Arc, + registration: WatchdogRegistration, + ) -> CodexResult> { + if exceeds_thread_spawn_depth_limit( + registration.child_depth, + registration.config.agent_max_depth, + ) { + let max_depth = registration.config.agent_max_depth; + return Err(CodexErr::UnsupportedOperation(format!( + "agent depth limit reached: max depth is {max_depth}" + ))); + } + let interval = interval_duration(registration.interval_s)?; + let generation = self.next_generation.fetch_add(1, Ordering::AcqRel); + let now = Instant::now(); + let entry = WatchdogEntry { + registration, + interval, + last_trigger: now, + active_helper_id: None, + owner_idle_since: Some(now), + owner_was_running: false, + force_due_once: false, + generation, + }; + + let mut registrations = self.registrations.lock().await; + let superseded_targets: Vec = registrations + .iter() + .filter_map(|(target_thread_id, existing_entry)| { + (existing_entry.registration.owner_thread_id == entry.registration.owner_thread_id + && *target_thread_id != entry.registration.target_thread_id) + .then_some(*target_thread_id) + }) + .collect(); + let mut superseded = Vec::new(); + for superseded_target in superseded_targets { + if let Some(removed) = registrations.remove(&superseded_target) { + superseded.push(RemovedWatchdog { + target_thread_id: superseded_target, + active_helper_id: removed.active_helper_id, + }); + } + } + registrations.insert(entry.registration.target_thread_id, entry); + Ok(superseded) + } + + async fn run_loop(self: Arc) { + let tick = tick_duration(); + loop { + self.run_once().await; + if self.manager.upgrade().is_none() { + break; + } + tokio::time::sleep(tick).await; + } + } + + pub(crate) async fn run_once(self: &Arc) { + let Some(manager_state) = self.manager.upgrade() else { + self.registrations.lock().await.clear(); + return; + }; + + let snapshots: Vec<(ThreadId, i64)> = { + let registrations = self.registrations.lock().await; + registrations + .iter() + .map(|(target_id, entry)| (*target_id, entry.generation)) + .collect() + }; + let now = Instant::now(); + + for (target_id, generation) in snapshots { + self.evaluate(&manager_state, target_id, generation, now) + .await; + } + } + + async fn evaluate( + self: &Arc, + manager_state: &Arc, + target_thread_id: ThreadId, + generation: i64, + now: Instant, + ) { + let Some(snapshot) = self.snapshot(target_thread_id, generation).await else { + return; + }; + + let owner_thread = manager_state.get_thread(snapshot.owner_thread_id).await; + let owner_status = match owner_thread.as_ref() { + Ok(thread) => thread.agent_status().await, + Err(_) => AgentStatus::NotFound, + }; + let control_for_spawn = AgentControl::from_parts( + self.manager.clone(), + Arc::clone(&self.state), + Arc::clone(self), + ); + if is_watchdog_terminated(&owner_status) { + match control_for_spawn.shutdown_agent(target_thread_id).await { + Ok(_) | Err(CodexErr::ThreadNotFound(_)) | Err(CodexErr::InternalAgentDied) => {} + Err(err) => { + warn!( + owner_thread_id = %snapshot.owner_thread_id, + target_thread_id = %target_thread_id, + "watchdog owner termination cleanup failed: {err}" + ); + } + } + return; + } + let force_due = self + .take_force_due_if_generation(target_thread_id, generation) + .await; + let owner_has_active_turn = match owner_thread { + Ok(thread) => thread.has_active_turn().await, + Err(_) => false, + }; + let owner_running = (is_running(&owner_status) || owner_has_active_turn) && !force_due; + let owner_idle_since = self + .update_owner_idle_state_if_generation( + target_thread_id, + generation, + owner_running, + now, + force_due, + ) + .await; + if owner_running { + return; + } + let owner_idle_since = owner_idle_since.or(snapshot.owner_idle_since); + let Some(owner_idle_since) = owner_idle_since else { + return; + }; + if now.duration_since(owner_idle_since) < snapshot.interval { + return; + } + + if let Some(helper_id) = snapshot.active_helper_id { + let helper_status = get_status(manager_state, helper_id).await; + if !is_final(&helper_status) { + return; + } + + let helper_sent_input = manager_state + .get_thread(helper_id) + .await + .map(|thread| thread.last_completed_turn_used_agent_send_input()) + .unwrap_or(false); + // Every watchdog check-in must wake the owner thread exactly once. + // + // Preferred path: the helper explicitly calls `send_input`. + // Mandatory fallback: if the helper reaches a terminal state without + // using `send_input`, forward a conclusory inbox message to the + // owner so the owner thread is still resumed. + if !helper_sent_input { + let fallback_message = match &helper_status { + AgentStatus::Completed(Some(message)) if !message.trim().is_empty() => { + Some(message.clone()) + } + AgentStatus::Completed(_) => Some( + "Watchdog check-in completed without calling send_input or returning a final message." + .to_string(), + ), + AgentStatus::Errored(message) if !message.trim().is_empty() => Some( + format!("Watchdog check-in failed before calling send_input: {message}"), + ), + AgentStatus::Errored(_) => Some( + "Watchdog check-in failed before calling send_input.".to_string(), + ), + AgentStatus::Interrupted => { + Some("Watchdog check-in was interrupted before calling send_input.".to_string()) + } + AgentStatus::Shutdown => { + Some("Watchdog check-in ended before calling send_input.".to_string()) + } + AgentStatus::NotFound => Some( + "Watchdog check-in disappeared before calling send_input.".to_string(), + ), + AgentStatus::PendingInit | AgentStatus::Running => None, + }; + + if let Some(message) = fallback_message { + if let Err(err) = control_for_spawn + .send_watchdog_wakeup(snapshot.owner_thread_id, helper_id, message) + .await + { + warn!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog helper forward failed: {err}" + ); + } else { + info!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog forwarded helper completion to owner" + ); + } + } + } + if let Err(err) = control_for_spawn.shutdown_agent(helper_id).await { + warn!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog helper cleanup failed: {err}" + ); + } + self.update_after_spawn( + target_thread_id, + generation, + now, + /*active_helper_id*/ None, + ) + .await; + return; + } + + if now.duration_since(snapshot.last_trigger) < snapshot.interval { + return; + } + + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: snapshot.owner_thread_id, + depth: snapshot.child_depth, + agent_path: None, + agent_nickname: None, + agent_role: None, + }); + let mut helper_config = snapshot.config.clone(); + helper_config.ephemeral = true; + if helper_config + .features + .enabled(Feature::AgentPromptInjection) + { + let watchdog_prompt = load_watchdog_prompt(&helper_config.codex_home).await; + helper_config.developer_instructions = match ( + watchdog_prompt.trim().is_empty(), + helper_config.developer_instructions, + ) { + (true, existing) => existing, + (false, Some(existing)) if !existing.trim().is_empty() => { + Some(format!("{existing}\n\n{watchdog_prompt}")) + } + (false, _) => Some(watchdog_prompt), + }; + } + let helper_prompt = + watchdog_helper_prompt(&helper_config, snapshot.owner_thread_id, &snapshot.prompt) + .await; + // Watchdog check-ins must fork a distinct helper thread. If this path ever resumes + // the owner thread instead, the owner can self-wake and rapidly duplicate session + // state in memory. + let spawn_result = control_for_spawn + .fork_agent( + helper_config, + vec![UserInput::Text { + text: helper_prompt, + text_elements: Vec::new(), + }], + snapshot.owner_thread_id, + usize::MAX, + session_source, + ) + .await; + + match spawn_result { + Ok(helper_id) => { + info!("watchdog spawned helper {helper_id} for target {target_thread_id}"); + self.update_after_spawn(target_thread_id, generation, now, Some(helper_id)) + .await; + } + Err(err) => { + warn!("watchdog spawn failed for target {target_thread_id}: {err}"); + self.update_after_spawn( + target_thread_id, + generation, + now, + /*active_helper_id*/ None, + ) + .await; + } + } + } + + async fn snapshot( + &self, + target_thread_id: ThreadId, + generation: i64, + ) -> Option { + let registrations = self.registrations.lock().await; + let entry = registrations.get(&target_thread_id)?; + if entry.generation != generation { + return None; + } + Some(WatchdogSnapshot { + owner_thread_id: entry.registration.owner_thread_id, + child_depth: entry.registration.child_depth, + prompt: entry.registration.prompt.clone(), + config: entry.registration.config.clone(), + interval: entry.interval, + last_trigger: entry.last_trigger, + active_helper_id: entry.active_helper_id, + owner_idle_since: entry.owner_idle_since, + }) + } + + async fn update_owner_idle_state_if_generation( + &self, + target_thread_id: ThreadId, + generation: i64, + owner_running: bool, + now: Instant, + force_due: bool, + ) -> Option { + let mut registrations = self.registrations.lock().await; + let entry = registrations.get_mut(&target_thread_id)?; + if entry.generation != generation { + return None; + } + + if force_due { + return entry.owner_idle_since; + } + + if owner_running { + entry.owner_idle_since = None; + entry.owner_was_running = true; + return None; + } + + if entry.owner_was_running || entry.owner_idle_since.is_none() { + entry.owner_idle_since = Some(now); + } + entry.owner_was_running = false; + entry.owner_idle_since + } + + async fn take_force_due_if_generation( + &self, + target_thread_id: ThreadId, + generation: i64, + ) -> bool { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return false; + }; + if entry.generation != generation || !entry.force_due_once { + return false; + } + entry.force_due_once = false; + true + } + + #[cfg(test)] + pub(crate) async fn force_due_for_tests(&self, target_thread_id: ThreadId) { + let mut registrations = self.registrations.lock().await; + if let Some(entry) = registrations.get_mut(&target_thread_id) { + entry.force_due_once = true; + } + } + + async fn update_after_spawn( + &self, + target_thread_id: ThreadId, + generation: i64, + now: Instant, + active_helper_id: Option, + ) { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return; + }; + if entry.generation != generation { + return; + } + entry.last_trigger = now; + entry.active_helper_id = active_helper_id; + } + + pub(crate) async fn unregister(&self, target_thread_id: ThreadId) -> Option { + let mut registrations = self.registrations.lock().await; + registrations + .remove(&target_thread_id) + .map(|removed| RemovedWatchdog { + target_thread_id, + active_helper_id: removed.active_helper_id, + }) + } + + pub(crate) async fn owner_for_active_helper( + &self, + helper_thread_id: ThreadId, + ) -> Option { + let registrations = self.registrations.lock().await; + registrations.values().find_map(|entry| { + (entry.active_helper_id == Some(helper_thread_id)) + .then_some(entry.registration.owner_thread_id) + }) + } + + pub(crate) async fn registered_targets(&self, candidate_ids: &[ThreadId]) -> HashSet { + let registrations = self.registrations.lock().await; + candidate_ids + .iter() + .copied() + .filter(|candidate_id| registrations.contains_key(candidate_id)) + .collect() + } + + pub(crate) async fn take_for_owner(&self, owner_thread_id: ThreadId) -> Vec { + let mut registrations = self.registrations.lock().await; + let removed_targets: Vec = registrations + .iter() + .filter_map(|(target_thread_id, entry)| { + (entry.registration.owner_thread_id == owner_thread_id).then_some(*target_thread_id) + }) + .collect(); + let mut removed = Vec::new(); + for removed_target in removed_targets { + if let Some(entry) = registrations.remove(&removed_target) { + removed.push(RemovedWatchdog { + target_thread_id: removed_target, + active_helper_id: entry.active_helper_id, + }); + } + } + removed + } + + #[cfg(test)] + #[allow(dead_code)] + pub(crate) async fn set_active_helper_for_tests( + &self, + target_thread_id: ThreadId, + helper_thread_id: ThreadId, + ) { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return; + }; + let due_at = Instant::now() - entry.interval; + entry.last_trigger = due_at; + entry.owner_idle_since = Some(due_at); + entry.owner_was_running = false; + entry.active_helper_id = Some(helper_thread_id); + } +} + +#[derive(Clone)] +struct WatchdogSnapshot { + owner_thread_id: ThreadId, + child_depth: i32, + prompt: String, + config: Config, + interval: Duration, + last_trigger: Instant, + active_helper_id: Option, + owner_idle_since: Option, +} + +async fn get_status(manager_state: &Arc, thread_id: ThreadId) -> AgentStatus { + let Ok(thread) = manager_state.get_thread(thread_id).await else { + return AgentStatus::NotFound; + }; + thread.agent_status().await +} + +fn is_running(status: &AgentStatus) -> bool { + matches!(status, AgentStatus::PendingInit | AgentStatus::Running) +} + +fn is_watchdog_terminated(status: &AgentStatus) -> bool { + matches!(status, AgentStatus::Shutdown | AgentStatus::NotFound) +} + +fn interval_duration(interval_s: i64) -> CodexResult { + if interval_s <= 0 { + return Err(CodexErr::UnsupportedOperation( + "interval_s must be greater than zero".to_string(), + )); + } + let seconds = u64::try_from(interval_s).map_err(|_| { + CodexErr::UnsupportedOperation(format!("interval_s out of range: {interval_s}")) + })?; + Ok(Duration::from_secs(seconds)) +} + +fn tick_duration() -> Duration { + let seconds = u64::try_from(WATCHDOG_TICK_SECONDS).unwrap_or(5); + Duration::from_secs(seconds) +} + +async fn watchdog_helper_prompt( + _config: &Config, + target_thread_id: ThreadId, + prompt: &str, +) -> String { + if prompt.trim().is_empty() { + format!("Target agent id: {target_thread_id}") + } else { + format!("Target agent id: {target_thread_id}\n\n{prompt}") + } +} + +#[cfg(test)] +mod tests { + use super::watchdog_helper_prompt; + use crate::config::ConfigBuilder; + use codex_features::Feature; + use codex_protocol::ThreadId; + + #[tokio::test] + async fn watchdog_helper_prompt_is_minimal_when_agent_prompt_injection_is_disabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("load config"); + let target_thread_id = ThreadId::default(); + let prompt = watchdog_helper_prompt(&config, target_thread_id, "ping").await; + assert_eq!( + prompt, + format!("Target agent id: {target_thread_id}\n\nping") + ); + } + + #[tokio::test] + async fn watchdog_helper_prompt_is_user_task_only_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let mut config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("load config"); + let _ = config.features.enable(Feature::AgentPromptInjection); + + let prompt = watchdog_helper_prompt(&config, ThreadId::default(), "ping").await; + assert!(prompt.contains("Target agent id:")); + assert!(prompt.ends_with("\n\nping")); + } +} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 092ce8f3e6..c921be3a74 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -4,7 +4,9 @@ use std::fmt::Debug; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; +use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; use crate::AuthManager; use crate::CodexAuth; @@ -136,6 +138,7 @@ use rmcp::model::ReadResourceResult; use rmcp::model::RequestId; use serde_json; use serde_json::Value; +use tokio::fs; use tokio::sync::Mutex; use tokio::sync::RwLock; use tokio::sync::oneshot; @@ -324,6 +327,83 @@ use crate::shell_snapshot::ShellSnapshot; use crate::skills_watcher::SkillsWatcher; use crate::skills_watcher::SkillsWatcherEvent; use crate::state::ActiveTurn; + +const ROOT_AGENT_PROMPT_FALLBACK: &str = include_str!("../root_agent_prompt.md"); +const ROOT_AGENT_WATCHDOG_PROMPT_FALLBACK: &str = include_str!("../root_agent_watchdog_prompt.md"); +const SUBAGENT_PROMPT_FALLBACK: &str = include_str!("../subagent_prompt.md"); +const SUBAGENT_WATCHDOG_PROMPT_FALLBACK: &str = include_str!("../subagent_watchdog_prompt.md"); +const WATCHDOG_PROMPT_FALLBACK: &str = include_str!("../watchdog_agent_prompt.md"); + +async fn load_agent_prompt_fallback( + codex_home: &Path, + fallback: &str, + override_filename: &str, +) -> String { + let override_path = codex_home.join(override_filename); + if let Ok(contents) = fs::read_to_string(&override_path).await + && !contents.trim().is_empty() + { + return contents; + } + + fallback.to_string() +} + +async fn maybe_load_agent_prompt_fragment( + codex_home: &Path, + fallback: &str, + override_filename: &str, + enabled: bool, +) -> Option { + if !enabled { + return None; + } + let fragment = load_agent_prompt_fallback(codex_home, fallback, override_filename).await; + if fragment.trim().is_empty() { + None + } else { + Some(fragment) + } +} + +async fn load_root_agent_prompt(codex_home: &Path, include_watchdog: bool) -> String { + let mut prompt = + load_agent_prompt_fallback(codex_home, ROOT_AGENT_PROMPT_FALLBACK, "AGENTS.root.md").await; + if let Some(fragment) = maybe_load_agent_prompt_fragment( + codex_home, + ROOT_AGENT_WATCHDOG_PROMPT_FALLBACK, + "AGENTS.root.watchdog.md", + include_watchdog, + ) + .await + { + prompt.push_str("\n\n"); + prompt.push_str(fragment.trim()); + } + prompt +} + +pub(crate) async fn load_subagent_prompt(codex_home: &Path, include_watchdog: bool) -> String { + let mut prompt = + load_agent_prompt_fallback(codex_home, SUBAGENT_PROMPT_FALLBACK, "AGENTS.subagent.md") + .await; + if let Some(fragment) = maybe_load_agent_prompt_fragment( + codex_home, + SUBAGENT_WATCHDOG_PROMPT_FALLBACK, + "AGENTS.subagent.watchdog.md", + include_watchdog, + ) + .await + { + prompt.push_str("\n\n"); + prompt.push_str(fragment.trim()); + } + prompt +} + +pub(crate) async fn load_watchdog_prompt(codex_home: &Path) -> String { + load_agent_prompt_fallback(codex_home, WATCHDOG_PROMPT_FALLBACK, "AGENTS.watchdog.md").await +} use crate::state::SessionServices; use crate::state::SessionState; use crate::state_db; @@ -556,6 +636,40 @@ impl Codex { let model = models_manager .get_default_model(&config.model, refresh_strategy) .await; + let forked_subagent = matches!(session_source, SessionSource::SubAgent(_)) + && matches!(conversation_history, InitialHistory::Forked(_)); + let role_prompt = if config.features.enabled(Feature::Collab) + && config.features.enabled(Feature::AgentPromptInjection) + && !forked_subagent + { + match session_source { + SessionSource::SubAgent(_) => Some( + load_subagent_prompt( + &config.codex_home, + config.features.enabled(Feature::AgentWatchdog), + ) + .await, + ), + _ => Some( + load_root_agent_prompt( + &config.codex_home, + config.features.enabled(Feature::AgentWatchdog), + ) + .await, + ), + } + } else { + None + }; + let developer_instructions = if forked_subagent { + None + } else { + match (role_prompt, config.developer_instructions.clone()) { + (Some(prompt), Some(existing)) => Some(format!("{prompt}\n\n{existing}")), + (Some(prompt), None) => Some(prompt), + (None, existing) => existing, + } + }; // Resolve base instructions for the session. Priority order: // 1. config.base_instructions override @@ -610,7 +724,7 @@ impl Codex { collaboration_mode, model_reasoning_summary: config.model_reasoning_summary, service_tier: config.service_tier, - developer_instructions: config.developer_instructions.clone(), + developer_instructions, user_instructions, personality: config.personality, base_instructions, @@ -811,6 +925,8 @@ pub(crate) struct Session { pub(crate) services: SessionServices, js_repl: Arc, next_internal_sub_id: AtomicU64, + turn_used_agent_send_input: AtomicBool, + last_completed_turn_used_agent_send_input: AtomicBool, } #[derive(Clone, Debug)] @@ -1124,6 +1240,9 @@ impl SessionConfiguration { sandbox_policy: self.sandbox_policy.get().clone(), cwd: self.cwd.to_path_buf(), ephemeral: self.original_config_do_not_use.ephemeral, + agent_use_function_call_inbox: self + .original_config_do_not_use + .agent_use_function_call_inbox, reasoning_effort: self.collaboration_mode.reasoning_effort(), personality: self.personality, session_source: self.session_source.clone(), @@ -1922,6 +2041,8 @@ impl Session { services, js_repl, next_internal_sub_id: AtomicU64::new(0), + turn_used_agent_send_input: AtomicBool::new(false), + last_completed_turn_used_agent_send_input: AtomicBool::new(false), }); if let Some(network_policy_decider_session) = network_policy_decider_session { let mut guard = network_policy_decider_session.write().await; @@ -2066,7 +2187,44 @@ impl Session { self.services.state_db.clone() } - /// Ensure rollout file writes are durably flushed. + pub(crate) async fn has_active_turn(&self) -> bool { + self.active_turn.lock().await.is_some() + } + + pub(crate) async fn parent_thread_id(&self) -> Option { + let state = self.state.lock().await; + match &state.session_configuration.session_source { + SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, .. + }) => Some(*parent_thread_id), + _ => None, + } + } + + pub(crate) fn mark_turn_used_agent_send_input(&self) { + self.turn_used_agent_send_input + .store(true, Ordering::Release); + } + + pub(crate) fn reset_turn_agent_send_input_flag(&self) { + self.turn_used_agent_send_input + .store(false, Ordering::Release); + } + + pub(crate) fn snapshot_agent_send_input_on_turn_complete(&self) { + let used_agent_send_input = self + .turn_used_agent_send_input + .swap(false, Ordering::AcqRel); + self.last_completed_turn_used_agent_send_input + .store(used_agent_send_input, Ordering::Release); + } + + pub(crate) fn last_completed_turn_used_agent_send_input(&self) -> bool { + self.last_completed_turn_used_agent_send_input + .load(Ordering::Acquire) + } + + /// Ensure all rollout writes are durably flushed. pub(crate) async fn flush_rollout(&self) { let recorder = { let guard = self.services.rollout.lock().await; @@ -4410,6 +4568,10 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv handlers::inter_agent_communication(&sess, sub.id.clone(), communication).await; false } + Op::InjectResponseItems { items } => { + handlers::inject_response_items(&sess, sub.id.clone(), items).await; + false + } Op::ExecApproval { id: approval_id, turn_id, @@ -4610,7 +4772,10 @@ mod handlers { use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::dynamic_tools::DynamicToolResponse; + use codex_protocol::items::TurnItem; use codex_protocol::mcp::RequestId as ProtocolRequestId; + use codex_protocol::models::ResponseInputItem; + use codex_protocol::models::ResponseItem; use codex_protocol::user_input::UserInput; use codex_rmcp_client::ElicitationAction; use codex_rmcp_client::ElicitationResponse; @@ -4756,6 +4921,76 @@ mod handlers { } } + pub async fn inject_response_items( + sess: &Arc, + sub_id: String, + items: Vec, + ) { + const MAX_TURN_RESTART_ATTEMPTS: usize = 3; + + let mut pending_items = items; + let mut attempts = 0usize; + loop { + match sess.inject_response_items(pending_items).await { + Ok(()) => return, + Err(items_without_active_turn) => { + pending_items = items_without_active_turn; + } + } + + if attempts >= MAX_TURN_RESTART_ATTEMPTS { + warn!( + attempts, + remaining_items = pending_items.len(), + "dropping response items after repeated turn restart failures" + ); + return; + } + attempts += 1; + + let turn_input = + pop_leading_user_message_input(&mut pending_items).unwrap_or_else(|| { + vec![UserInput::Text { + text: String::new(), + text_elements: Vec::new(), + }] + }); + let turn_sub_id = if attempts == 1 { + sub_id.clone() + } else { + format!("{sub_id}-retry-{attempts}") + }; + let current_context = sess.new_default_turn_with_sub_id(turn_sub_id).await; + // Keep injected inbox wakeups visible to telemetry after the TurnContext field rename. + current_context.session_telemetry.user_prompt(&turn_input); + + sess.refresh_mcp_servers_if_requested(¤t_context) + .await; + sess.spawn_task( + Arc::clone(¤t_context), + turn_input, + crate::tasks::RegularTask::new(), + ) + .await; + + if pending_items.is_empty() { + return; + } + } + } + + fn pop_leading_user_message_input( + items: &mut Vec, + ) -> Option> { + let first_item = items.first().cloned()?; + let response_item = ResponseItem::from(first_item); + let TurnItem::UserMessage(user_message) = crate::parse_turn_item(&response_item)? else { + return None; + }; + let _ = items.remove(0); + Some(user_message.content) + } + pub async fn run_user_shell_command(sess: &Arc, sub_id: String, command: String) { if let Some((turn_context, cancellation_token)) = sess.active_turn_context_and_cancellation_token().await diff --git a/codex-rs/core/src/codex_tests.rs b/codex-rs/core/src/codex_tests.rs index f1a6652770..83f3459901 100644 --- a/codex-rs/core/src/codex_tests.rs +++ b/codex-rs/core/src/codex_tests.rs @@ -2367,8 +2367,12 @@ enabled = false "custom".to_string(), crate::config::AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); crate::agent::role::apply_role_to_config(&mut child_config, Some("custom")) @@ -2751,6 +2755,8 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) { guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, js_repl, + turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), + last_completed_turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), next_internal_sub_id: AtomicU64::new(0), }; @@ -3591,6 +3597,8 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx( guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, js_repl, + turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), + last_completed_turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), next_internal_sub_id: AtomicU64::new(0), }); @@ -5284,3 +5292,27 @@ async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() pretty_assertions::assert_eq!(output, expected); } + +#[tokio::test] +async fn root_agent_prompt_only_includes_watchdog_fragment_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let without_watchdog = + load_root_agent_prompt(codex_home.path(), /*include_watchdog*/ false).await; + assert!(!without_watchdog.contains("## Watchdogs")); + + let with_watchdog = load_root_agent_prompt(codex_home.path(), /*include_watchdog*/ true).await; + assert!(with_watchdog.contains("## Watchdogs")); +} + +#[tokio::test] +async fn subagent_prompt_only_includes_watchdog_fragment_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let without_watchdog = + load_subagent_prompt(codex_home.path(), /*include_watchdog*/ false).await; + assert!(!without_watchdog.contains("## Watchdog-only Guidance")); + + let with_watchdog = load_subagent_prompt(codex_home.path(), /*include_watchdog*/ true).await; + assert!(with_watchdog.contains("## Watchdog-only Guidance")); +} diff --git a/codex-rs/core/src/codex_thread.rs b/codex-rs/core/src/codex_thread.rs index 0635718c9c..97bca0bba0 100644 --- a/codex-rs/core/src/codex_thread.rs +++ b/codex-rs/core/src/codex_thread.rs @@ -38,6 +38,7 @@ pub struct ThreadConfigSnapshot { pub sandbox_policy: SandboxPolicy, pub cwd: PathBuf, pub ephemeral: bool, + pub agent_use_function_call_inbox: bool, pub reasoning_effort: Option, pub personality: Option, pub session_source: SessionSource, @@ -130,6 +131,16 @@ impl CodexThread { self.codex.session.total_token_usage().await } + pub(crate) async fn has_active_turn(&self) -> bool { + self.codex.session.has_active_turn().await + } + + pub(crate) fn last_completed_turn_used_agent_send_input(&self) -> bool { + self.codex + .session + .last_completed_turn_used_agent_send_input() + } + /// Records a user-role session-prefix message without creating a new user turn boundary. pub(crate) async fn inject_user_message_without_turn(&self, message: String) { let message = ResponseItem::Message { diff --git a/codex-rs/core/src/config/agent_roles.rs b/codex-rs/core/src/config/agent_roles.rs index c527435e92..19d1484c92 100644 --- a/codex-rs/core/src/config/agent_roles.rs +++ b/codex-rs/core/src/config/agent_roles.rs @@ -152,11 +152,15 @@ fn read_declared_role( fn merge_missing_role_fields(role: &mut AgentRoleConfig, fallback: &AgentRoleConfig) { role.description = role.description.clone().or(fallback.description.clone()); + role.model = role.model.clone().or(fallback.model.clone()); role.config_file = role.config_file.clone().or(fallback.config_file.clone()); + role.spawn_mode = role.spawn_mode.or(fallback.spawn_mode); + role.watchdog_interval_s = role.watchdog_interval_s.or(fallback.watchdog_interval_s); role.nickname_candidates = role .nickname_candidates .clone() .or(fallback.nickname_candidates.clone()); + role.fork_context = role.fork_context.or(fallback.fork_context); } fn agents_toml_from_layer(layer_toml: &TomlValue) -> std::io::Result> { @@ -188,8 +192,12 @@ fn agent_role_config_from_toml( Ok(AgentRoleConfig { description, + model: role.model.clone(), config_file, + spawn_mode: role.spawn_mode, + watchdog_interval_s: role.watchdog_interval_s, nickname_candidates, + fork_context: role.fork_context, }) } @@ -478,8 +486,12 @@ fn discover_agent_roles_in_dir( role_name, AgentRoleConfig { description: parsed_file.description, + model: None, config_file: Some(agent_file), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: parsed_file.nickname_candidates, + fork_context: None, }, ); } diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index 00227fe2b3..d706bc328b 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -1715,6 +1715,28 @@ fn feature_table_overrides_legacy_flags() -> std::io::Result<()> { Ok(()) } +#[test] +fn feature_table_enables_agent_function_call_inbox() -> std::io::Result<()> { + let codex_home = TempDir::new()?; + let mut entries = BTreeMap::new(); + entries.insert("agent_function_call_inbox".to_string(), true); + let cfg = ConfigToml { + features: Some(codex_features::FeaturesToml { entries }), + ..Default::default() + }; + + let config = Config::load_from_base_config_with_overrides( + cfg, + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + )?; + + assert!(config.features.enabled(Feature::AgentFunctionCallInbox)); + assert!(config.agent_use_function_call_inbox); + + Ok(()) +} + #[test] fn legacy_toggles_map_to_features() -> std::io::Result<()> { let codex_home = TempDir::new()?; @@ -3191,12 +3213,17 @@ fn load_config_rejects_missing_agent_role_config_file() -> std::io::Result<()> { max_threads: None, max_depth: None, job_max_runtime_seconds: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: Some(missing_path.abs()), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]), }), @@ -4056,15 +4083,20 @@ fn load_config_normalizes_agent_role_nickname_candidates() -> std::io::Result<() max_threads: None, max_depth: None, job_max_runtime_seconds: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: Some(vec![ " Hypatia ".to_string(), "Noether".to_string(), ]), + fork_context: None, }, )]), }), @@ -4097,12 +4129,17 @@ fn load_config_rejects_empty_agent_role_nickname_candidates() -> std::io::Result max_threads: None, max_depth: None, job_max_runtime_seconds: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: Some(Vec::new()), + fork_context: None, }, )]), }), @@ -4132,12 +4169,17 @@ fn load_config_rejects_duplicate_agent_role_nickname_candidates() -> std::io::Re max_threads: None, max_depth: None, job_max_runtime_seconds: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Hypatia".to_string(), " Hypatia ".to_string()]), + fork_context: None, }, )]), }), @@ -4167,12 +4209,17 @@ fn load_config_rejects_unsafe_agent_role_nickname_candidates() -> std::io::Resul max_threads: None, max_depth: None, job_max_runtime_seconds: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Agent ".to_string()]), + fork_context: None, }, )]), }), @@ -4414,6 +4461,8 @@ fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> { agent_roles: BTreeMap::new(), memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), @@ -4556,6 +4605,8 @@ fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> { agent_roles: BTreeMap::new(), memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), @@ -4696,6 +4747,8 @@ fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> { agent_roles: BTreeMap::new(), memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), @@ -4822,6 +4875,8 @@ fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> { agent_roles: BTreeMap::new(), memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 1a0722119b..8cd59fa659 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -145,6 +145,7 @@ pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB pub(crate) const DEFAULT_AGENT_MAX_THREADS: Option = Some(6); pub(crate) const DEFAULT_AGENT_MAX_DEPTH: i32 = 1; pub(crate) const DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS: Option = None; +pub(crate) const DEFAULT_WATCHDOG_INTERVAL_S: i64 = 60; pub const CONFIG_TOML_FILE: &str = "config.toml"; const OPENAI_BASE_URL_ENV_VAR: &str = "OPENAI_BASE_URL"; @@ -403,6 +404,13 @@ pub struct Config { pub agent_max_threads: Option, /// Maximum runtime in seconds for agent job workers before they are failed. pub agent_job_max_runtime_seconds: Option, + /// When true, inbound agent messages to non-subagent threads are delivered + /// as a synthetic function_call/function_call_output pair instead of plain + /// user input. + pub agent_use_function_call_inbox: bool, + + /// Watchdog polling interval in seconds. + pub watchdog_interval_s: i64, /// Maximum nesting depth allowed for spawned agent threads. pub agent_max_depth: i32, @@ -1318,6 +1326,9 @@ pub struct ConfigToml { /// Agent-related settings (thread limits, etc.). pub agents: Option, + /// Watchdog polling interval in seconds. + pub watchdog_interval_s: Option, + /// Memories subsystem settings. pub memories: Option, @@ -1548,6 +1559,10 @@ pub struct AgentsToml { /// Default maximum runtime in seconds for agent job workers. #[schemars(range(min = 1))] pub job_max_runtime_seconds: Option, + /// Deliver inbound agent messages to non-subagent threads as a synthetic + /// function_call/function_call_output pair instead of plain user input. + #[serde(default)] + pub use_function_call_inbox: bool, /// User-defined role declarations keyed by role name. /// @@ -1562,15 +1577,31 @@ pub struct AgentsToml { pub roles: BTreeMap, } +#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum AgentRoleSpawnMode { + #[default] + Spawn, + Fork, +} + #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct AgentRoleConfig { /// Human-facing role documentation used in spawn tool guidance. /// Required for loaded user-defined roles after deprecated/new metadata precedence resolves. pub description: Option, + /// Optional model override applied by this role. + pub model: Option, /// Path to a role-specific config layer. pub config_file: Option, + /// Optional default spawn mode when `spawn_agent` omits `spawn_mode`. + pub spawn_mode: Option, + /// Optional watchdog interval for roles that should spawn as idle-time watchdog handles. + pub watchdog_interval_s: Option, /// Candidate nicknames for agents spawned with this role. pub nickname_candidates: Option>, + /// Default fork-context behavior for this role. + pub fork_context: Option, } #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)] @@ -1580,12 +1611,25 @@ pub struct AgentRoleToml { /// Required unless supplied by the referenced agent role file. pub description: Option, + /// Optional model override applied by this role. + pub model: Option, + /// Path to a role-specific config layer. /// Relative paths are resolved relative to the `config.toml` that defines them. pub config_file: Option, + /// Optional default spawn mode when `spawn_agent` omits `spawn_mode`. + pub spawn_mode: Option, + + /// Optional watchdog interval in seconds for roles that should behave as watchdogs. + #[schemars(range(min = 1))] + pub watchdog_interval_s: Option, + /// Candidate nicknames for agents spawned with this role. pub nickname_candidates: Option>, + + /// Default fork-context behavior for this role. + pub fork_context: Option, } impl From for Tools { @@ -2309,6 +2353,7 @@ impl Config { .as_ref() .and_then(|agents| agents.job_max_runtime_seconds) .or(DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS); + let agent_use_function_call_inbox = features.enabled(Feature::AgentFunctionCallInbox); if agent_job_max_runtime_seconds == Some(0) { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, @@ -2323,6 +2368,15 @@ impl Config { "agents.job_max_runtime_seconds must fit within a 64-bit signed integer", )); } + let watchdog_interval_s = cfg + .watchdog_interval_s + .unwrap_or(DEFAULT_WATCHDOG_INTERVAL_S); + if watchdog_interval_s <= 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "watchdog_interval_s must be at least 1", + )); + } let background_terminal_max_timeout = cfg .background_terminal_max_timeout .unwrap_or(DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS) @@ -2597,6 +2651,8 @@ impl Config { agent_roles, memories: cfg.memories.unwrap_or_default().into(), agent_job_max_runtime_seconds, + agent_use_function_call_inbox, + watchdog_interval_s, codex_home, sqlite_home, log_dir, diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs index cd77f1d5a3..af0d109dc9 100644 --- a/codex-rs/core/src/stream_events_utils.rs +++ b/codex-rs/core/src/stream_events_utils.rs @@ -428,6 +428,17 @@ pub(crate) fn last_assistant_message_from_item( pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Option { match input { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => Some(ResponseItem::FunctionCall { + id: None, + name: name.clone(), + namespace: None, + arguments: arguments.clone(), + call_id: call_id.clone(), + }), ResponseInputItem::FunctionCallOutput { call_id, output } => { Some(ResponseItem::FunctionCallOutput { call_id: call_id.clone(), diff --git a/codex-rs/core/src/tasks/compact.rs b/codex-rs/core/src/tasks/compact.rs index a2d94bdc0a..656fb74556 100644 --- a/codex-rs/core/src/tasks/compact.rs +++ b/codex-rs/core/src/tasks/compact.rs @@ -44,6 +44,11 @@ impl SessionTask for CompactTask { ); crate::compact::run_compact_task(session.clone(), ctx, input).await }; + session + .services + .agent_control + .finish_watchdog_parent_compaction(session.conversation_id) + .await; None } } diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index 601f5cb27f..42f7b77863 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -30,6 +30,7 @@ use crate::hook_runtime::record_additional_contexts; use crate::hook_runtime::record_pending_input; use crate::models_manager::manager::ModelsManager; use crate::protocol::EventMsg; +use crate::protocol::TokenUsage; use crate::protocol::TurnAbortReason; use crate::protocol::TurnAbortedEvent; use crate::protocol::TurnCompleteEvent; @@ -296,6 +297,7 @@ impl Session { turn_context .turn_metadata_state .cancel_git_enrichment_task(); + self.snapshot_agent_send_input_on_turn_complete(); let mut active = self.active_turn.lock().await; let mut pending_input = Vec::::new(); @@ -415,6 +417,20 @@ impl Session { self.send_event(turn_context.as_ref(), event).await; } + async fn register_new_active_task( + &self, + task: RunningTask, + token_usage_at_turn_start: TokenUsage, + ) { + self.reset_turn_agent_send_input_flag(); + let mut active = self.active_turn.lock().await; + let mut turn = ActiveTurn::default(); + let mut turn_state = turn.turn_state.lock().await; + turn_state.token_usage_at_turn_start = token_usage_at_turn_start; + drop(turn_state); + turn.add_task(task); + *active = Some(turn); + } async fn take_active_turn(&self) -> Option { let mut active = self.active_turn.lock().await; active.take() diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index 58d040cbe6..d7ac456a18 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -431,6 +431,16 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) -> raw_arguments: String::new(), }) } + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => serde_json::json!({ + "type": "function_call", + "name": name, + "arguments": arguments, + "call_id": call_id, + }), } } diff --git a/codex-rs/core/src/tools/handlers/multi_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents.rs index 166dbd287d..38affed9b3 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents.rs @@ -59,12 +59,16 @@ pub(crate) fn parse_agent_id_targets( } pub(crate) use close_agent::Handler as CloseAgentHandler; +pub(crate) use compact_parent_context::Handler as CompactParentContextHandler; +pub(crate) use list_agents::Handler as ListAgentsHandler; pub(crate) use resume_agent::Handler as ResumeAgentHandler; pub(crate) use send_input::Handler as SendInputHandler; pub(crate) use spawn::Handler as SpawnAgentHandler; pub(crate) use wait::Handler as WaitAgentHandler; pub mod close_agent; +mod compact_parent_context; +mod list_agents; mod resume_agent; mod send_input; mod spawn; diff --git a/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs b/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs new file mode 100644 index 0000000000..8468023ad8 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs @@ -0,0 +1,96 @@ +use super::*; +use crate::agent::WatchdogParentCompactionResult; + +pub(crate) struct Handler; + +#[async_trait] +impl ToolHandler for Handler { + type Output = CompactParentContextResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, payload, .. + } = invocation; + let arguments = function_arguments(payload)?; + let _args: CompactParentContextArgs = parse_arguments(&arguments)?; + let helper_thread_id = session.conversation_id; + let result = session + .services + .agent_control + .compact_parent_for_watchdog_helper(helper_thread_id) + .await + .map_err(|err| { + FunctionCallError::RespondToModel(format!("compact_parent_context failed: {err}")) + })?; + Ok(CompactParentContextResult::from(result)) + } +} + +#[derive(Debug, Deserialize)] +struct CompactParentContextArgs { + reason: Option, + evidence: Option, +} + +#[derive(Debug, Serialize)] +pub(crate) struct CompactParentContextResult { + kind: &'static str, + parent_thread_id: Option, + submission_id: Option, +} + +impl From for CompactParentContextResult { + fn from(value: WatchdogParentCompactionResult) -> Self { + match value { + WatchdogParentCompactionResult::NotWatchdogHelper => Self { + kind: "not_watchdog_helper", + parent_thread_id: None, + submission_id: None, + }, + WatchdogParentCompactionResult::ParentBusy { parent_thread_id } => Self { + kind: "parent_busy", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: None, + }, + WatchdogParentCompactionResult::AlreadyInProgress { parent_thread_id } => Self { + kind: "already_in_progress", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: None, + }, + WatchdogParentCompactionResult::Submitted { + parent_thread_id, + submission_id, + } => Self { + kind: "submitted", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: Some(submission_id), + }, + } + } +} + +impl ToolOutput for CompactParentContextResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "compact_parent_context") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "compact_parent_context") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "compact_parent_context") + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs new file mode 100644 index 0000000000..db52733129 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs @@ -0,0 +1,109 @@ +use super::*; +use crate::agent::AgentListing; +use crate::agent::agent_resolver::resolve_agent_target; + +pub(crate) struct Handler; + +#[async_trait] +impl ToolHandler for Handler { + type Output = ListAgentsResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + turn, + payload, + .. + } = invocation; + let arguments = function_arguments(payload)?; + let args: ListAgentsArgs = parse_arguments(&arguments)?; + let owner_thread_id = if args.all { + session.conversation_id + } else if let Some(target) = args + .id + .as_deref() + .map(str::trim) + .filter(|id| !id.is_empty()) + { + resolve_agent_target(&session, &turn, target).await? + } else { + session.conversation_id + }; + let agents = session + .services + .agent_control + .list_agents(owner_thread_id, args.recursive, args.all) + .await + .map_err(|err| { + FunctionCallError::RespondToModel(format!("list_agents failed: {err}")) + })?; + Ok(ListAgentsResult { + agents: agents.into_iter().map(ListedAgent::from_listing).collect(), + }) + } +} + +#[derive(Debug, Deserialize)] +struct ListAgentsArgs { + id: Option, + #[serde(default = "default_recursive")] + recursive: bool, + #[serde(default)] + all: bool, +} + +fn default_recursive() -> bool { + true +} + +#[derive(Debug, Serialize)] +pub(crate) struct ListAgentsResult { + agents: Vec, +} + +#[derive(Debug, Serialize)] +struct ListedAgent { + thread_id: String, + parent_thread_id: Option, + status: AgentStatus, + depth: usize, +} + +impl ListedAgent { + fn from_listing(value: AgentListing) -> Self { + Self { + thread_id: value.thread_id.to_string(), + parent_thread_id: value + .parent_thread_id + .map(|thread_id| thread_id.to_string()), + status: value.status, + depth: value.depth, + } + } +} + +impl ToolOutput for ListAgentsResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "list_agents") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "list_agents") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "list_agents") + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs index 3c8527712b..d16d60ad92 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs @@ -1,4 +1,5 @@ use super::*; +use crate::agent::agent_resolver::resolve_agent_target; use crate::agent::control::render_input_preview; pub(crate) struct Handler; @@ -25,8 +26,8 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: SendInputArgs = parse_arguments(&arguments)?; - let receiver_thread_id = parse_agent_id_target(&args.target)?; - let input_items = parse_collab_input(args.message, args.items)?; + let receiver_thread_id = resolve_agent_target(&session, &turn, &args.target).await?; + let input_items = parse_collab_input(args.message.clone(), args.items.clone())?; let prompt = render_input_preview(&input_items); let receiver_agent = session .services @@ -53,9 +54,15 @@ impl ToolHandler for Handler { .into(), ) .await; - let agent_control = session.services.agent_control.clone(); - let result = agent_control - .send_input(receiver_thread_id, input_items) + let result = session + .services + .agent_control + .send_agent_message_or_input( + receiver_thread_id, + session.conversation_id, + args.message, + args.items, + ) .await .map_err(|err| collab_agent_error(receiver_thread_id, err)); let status = session diff --git a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs index 3e9360d9e7..2194af2b4c 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs @@ -1,11 +1,19 @@ use super::*; +use crate::agent::RemovedWatchdog; +use crate::agent::WatchdogRegistration; +use crate::agent::control::LiveAgent; use crate::agent::control::SpawnAgentOptions; use crate::agent::control::render_input_preview; +use crate::agent::next_thread_spawn_depth; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::apply_role_to_config; - -use crate::agent::exceeds_thread_spawn_depth_limit; -use crate::agent::next_thread_spawn_depth; +use crate::agent::role::default_fork_context_for_role; +use crate::agent::role::watchdog_interval_for_role; +use crate::config::Config; +use codex_features::Feature; +use codex_protocol::protocol::AgentSpawnMode; +use codex_protocol::protocol::SessionSource; +use std::collections::HashSet; pub(crate) struct Handler; @@ -41,11 +49,28 @@ impl ToolHandler for Handler { let session_source = turn.session_source.clone(); let child_depth = next_thread_spawn_depth(&session_source); let max_depth = turn.config.agent_max_depth; + let watchdog_interval_s = watchdog_interval_for_role(&turn.config, role_name); + let is_watchdog = watchdog_interval_s.is_some(); + + if is_watchdog && !turn.config.features.enabled(Feature::AgentWatchdog) { + return Err(FunctionCallError::RespondToModel( + "watchdogs are disabled".to_string(), + )); + } + if is_watchdog && matches!(session_source, SessionSource::SubAgent(_)) { + return Err(FunctionCallError::RespondToModel( + "watchdogs can only be spawned by root agents".to_string(), + )); + } if exceeds_thread_spawn_depth_limit(child_depth, max_depth) { return Err(FunctionCallError::RespondToModel( "Agent depth limit reached. Solve the task yourself.".to_string(), )); } + let fork_context = args + .fork_context + .unwrap_or_else(|| default_fork_context_for_role(&turn.config, role_name)); + session .send_event( &turn, @@ -53,71 +78,111 @@ impl ToolHandler for Handler { call_id: call_id.clone(), sender_thread_id: session.conversation_id, prompt: prompt.clone(), - model: args - .model_fallback_list - .as_ref() - .and_then(|list| list.first()) - .map(|candidate| candidate.model.clone()) - .unwrap_or_else(|| args.model.clone().unwrap_or_default()), - reasoning_effort: args - .model_fallback_list - .as_ref() - .and_then(|list| list.first()) - .and_then(|candidate| candidate.reasoning_effort) - .unwrap_or_else(|| args.reasoning_effort.unwrap_or_default()), + model: if fork_context { + String::new() + } else { + args.model_fallback_list + .as_ref() + .and_then(|list| list.first()) + .map(|candidate| candidate.model.clone()) + .unwrap_or_else(|| args.model.clone().unwrap_or_default()) + }, + reasoning_effort: if fork_context { + ReasoningEffort::default() + } else { + args.model_fallback_list + .as_ref() + .and_then(|list| list.first()) + .and_then(|candidate| candidate.reasoning_effort) + .unwrap_or_else(|| args.reasoning_effort.unwrap_or_default()) + }, } .into(), ) .await; let config = build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; - - let mut candidates_to_try = collect_spawn_agent_model_candidates( - args.model_fallback_list.as_ref(), - args.model.as_deref(), - args.reasoning_effort, - ); - if candidates_to_try.is_empty() { - candidates_to_try.push(SpawnAgentModelCandidate { + let spawn_source = thread_spawn_source( + session.conversation_id, + &turn.session_source, + child_depth, + role_name, + /*task_name*/ None, + )?; + let mut candidates_to_try = if fork_context { + vec![SpawnAgentModelCandidate { model: None, reasoning_effort: None, - }); - } + }] + } else { + let mut candidates = collect_spawn_agent_model_candidates( + args.model_fallback_list.as_ref(), + args.model.as_deref(), + args.reasoning_effort, + ); + if candidates.is_empty() { + candidates.push(SpawnAgentModelCandidate { + model: None, + reasoning_effort: None, + }); + } + candidates + }; let mut spawn_result = None; for (idx, candidate) in candidates_to_try.iter().enumerate() { let mut candidate_config = config.clone(); - apply_requested_spawn_agent_model_overrides( - &session, - turn.as_ref(), - &mut candidate_config, - candidate.model.as_deref(), - candidate.reasoning_effort, - ) - .await?; + if !fork_context { + apply_requested_spawn_agent_model_overrides( + &session, + turn.as_ref(), + &mut candidate_config, + candidate.model.as_deref(), + candidate.reasoning_effort, + ) + .await?; + } apply_role_to_config(&mut candidate_config, role_name) .await .map_err(FunctionCallError::RespondToModel)?; apply_spawn_agent_runtime_overrides(&mut candidate_config, turn.as_ref())?; apply_spawn_agent_overrides(&mut candidate_config, child_depth); - let attempt_result = session - .services - .agent_control - .spawn_agent_with_metadata( + let attempt_result = if let Some(watchdog_interval_s) = watchdog_interval_s { + let thread_id = spawn_watchdog( + &session.services.agent_control, candidate_config, - input_items.clone(), - Some(thread_spawn_source( - session.conversation_id, - &turn.session_source, - child_depth, - role_name, - /*task_name*/ None, - )?), - SpawnAgentOptions { - fork_parent_spawn_call_id: args.fork_context.then(|| call_id.clone()), - }, + prompt.clone(), + session.conversation_id, + child_depth, + watchdog_interval_s, + spawn_source.clone(), ) - .await; + .await + .map_err(collab_spawn_error)?; + Ok(LiveAgent { + thread_id, + metadata: session + .services + .agent_control + .get_agent_metadata(thread_id) + .unwrap_or_default(), + status: session.services.agent_control.get_status(thread_id).await, + }) + } else { + session + .services + .agent_control + .spawn_agent_with_metadata( + candidate_config, + input_items.clone(), + Some(spawn_source.clone()), + SpawnAgentOptions { + fork_parent_spawn_call_id: fork_context.then(|| call_id.clone()), + ..Default::default() + }, + ) + .await + }; match attempt_result { Ok(spawned_agent) => { spawn_result = Some(spawned_agent); @@ -186,6 +251,13 @@ impl ToolHandler for Handler { prompt, model: effective_model, reasoning_effort: effective_reasoning_effort, + spawn_mode: if is_watchdog { + AgentSpawnMode::Watchdog + } else if fork_context { + AgentSpawnMode::Fork + } else { + AgentSpawnMode::Spawn + }, status, } .into(), @@ -200,7 +272,8 @@ impl ToolHandler for Handler { ); Ok(SpawnAgentResult { - agent_id: new_thread_id.to_string(), + agent_id: Some(new_thread_id.to_string()), + task_name: None, nickname, }) } @@ -210,17 +283,18 @@ impl ToolHandler for Handler { struct SpawnAgentArgs { message: Option, items: Option>, + task_name: Option, agent_type: Option, model: Option, model_fallback_list: Option>, reasoning_effort: Option, - #[serde(default)] - fork_context: bool, + fork_context: Option, } #[derive(Debug, Serialize)] pub(crate) struct SpawnAgentResult { - agent_id: String, + agent_id: Option, + task_name: Option, nickname: Option, } @@ -241,3 +315,56 @@ impl ToolOutput for SpawnAgentResult { tool_output_code_mode_result(self, "spawn_agent") } } + +async fn spawn_watchdog( + agent_control: &crate::agent::AgentControl, + config: Config, + prompt: String, + owner_thread_id: ThreadId, + child_depth: i32, + interval_s: i64, + spawn_source: SessionSource, +) -> crate::error::Result { + let target_thread_id = agent_control + .spawn_agent_handle(config.clone(), Some(spawn_source)) + .await?; + let superseded_before_register = agent_control + .unregister_watchdogs_for_owner(owner_thread_id) + .await; + shutdown_removed_watchdogs(agent_control, superseded_before_register).await; + let registration = WatchdogRegistration { + owner_thread_id, + target_thread_id, + child_depth, + interval_s, + prompt, + config, + }; + let superseded_after_register = match agent_control.register_watchdog(registration).await { + Ok(removed) => removed, + Err(err) => { + let _ = agent_control.close_agent(target_thread_id).await; + return Err(err); + } + }; + shutdown_removed_watchdogs(agent_control, superseded_after_register).await; + Ok(target_thread_id) +} + +async fn shutdown_removed_watchdogs( + agent_control: &crate::agent::AgentControl, + removed_watchdogs: Vec, +) { + let mut thread_ids = HashSet::new(); + for removed in removed_watchdogs { + thread_ids.insert(removed.target_thread_id); + if let Some(helper_id) = removed.active_helper_id { + thread_ids.insert(helper_id); + } + } + let mut thread_ids = thread_ids.into_iter().collect::>(); + thread_ids.sort_by_key(ToString::to_string); + for thread_id in thread_ids { + let _ = agent_control.close_agent(thread_id).await; + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs index 2fe33f1edd..a8e728659b 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs @@ -1,15 +1,16 @@ use super::*; +use crate::agent::agent_resolver::resolve_agent_targets; use crate::agent::status::is_final; use crate::error::CodexErr; use futures::FutureExt; use futures::StreamExt; use futures::stream::FuturesUnordered; use std::collections::HashMap; +use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; use tokio::sync::watch::Receiver; use tokio::time::Instant; - use tokio::time::timeout_at; pub(crate) struct Handler; @@ -36,7 +37,19 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: WaitArgs = parse_arguments(&arguments)?; - let receiver_thread_ids = parse_agent_id_targets(args.targets)?; + + if let Some(owner_thread_id) = session + .services + .agent_control + .watchdog_owner_for_active_helper(session.conversation_id) + .await + { + return Err(FunctionCallError::RespondToModel(format!( + "wait_agent is not available to watchdog check-in agents. This thread is a one-shot watchdog check-in for owner {owner_thread_id}. Send the result to the parent/root agent with `send_input`. If you finish without `send_input`, runtime will forward your conclusory message to the owner as the mandatory fallback wake-up path. Exiting without either `send_input` or a final message is a bug; every watchdog check-in must wake the owner thread." + ))); + } + + let receiver_thread_ids = resolve_agent_targets(&session, &turn, args.targets).await?; let mut receiver_agents = Vec::with_capacity(receiver_thread_ids.len()); let mut target_by_thread_id = HashMap::with_capacity(receiver_thread_ids.len()); for receiver_thread_id in &receiver_thread_ids { @@ -60,6 +73,22 @@ impl ToolHandler for Handler { }); } + let watchdog_target_ids = session + .services + .agent_control + .watchdog_targets(&receiver_thread_ids) + .await; + let mut waited_thread_ids = Vec::new(); + let mut watchdog_statuses = Vec::new(); + split_wait_ids( + &session, + receiver_thread_ids, + &watchdog_target_ids, + &mut waited_thread_ids, + &mut watchdog_statuses, + ) + .await; + let timeout_ms = args.timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS); let timeout_ms = match timeout_ms { ms if ms <= 0 => { @@ -75,7 +104,7 @@ impl ToolHandler for Handler { &turn, CollabWaitingBeginEvent { sender_thread_id: session.conversation_id, - receiver_thread_ids: receiver_thread_ids.clone(), + receiver_thread_ids: waited_thread_ids.clone(), receiver_agents: receiver_agents.clone(), call_id: call_id.clone(), } @@ -83,9 +112,31 @@ impl ToolHandler for Handler { ) .await; - let mut status_rxs = Vec::with_capacity(receiver_thread_ids.len()); + if waited_thread_ids.is_empty() { + let statuses_map = watchdog_statuses.iter().cloned().collect::>(); + let content = serde_json::to_string(&statuses_map).map_err(|err| { + FunctionCallError::Fatal(format!("failed to serialize wait_agent status: {err}")) + })?; + session + .send_event( + &turn, + CollabWaitingEndEvent { + sender_thread_id: session.conversation_id, + call_id, + agent_statuses: Vec::new(), + statuses: statuses_map, + } + .into(), + ) + .await; + return Err(FunctionCallError::RespondToModel(format!( + "wait_agent cannot be used to wait for watchdog check-ins. You passed only watchdog handle ids. Watchdog check-ins only happen after the current turn ends and the owner thread is idle for at least watchdog_interval_s. `wait_agent` on a watchdog handle is status-only and cannot confirm a new check-in. Do not poll with `wait_agent`, `list_agents`, or shell `sleep`: the owner thread is still active during this turn, so those calls cannot make the watchdog fire. Current watchdog handle statuses: {content}" + ))); + } + + let mut status_rxs = Vec::with_capacity(waited_thread_ids.len()); let mut initial_final_statuses = Vec::new(); - for id in &receiver_thread_ids { + for id in &waited_thread_ids { match session.services.agent_control.subscribe_status(*id).await { Ok(rx) => { let status = rx.borrow().clone(); @@ -98,8 +149,9 @@ impl ToolHandler for Handler { initial_final_statuses.push((*id, AgentStatus::NotFound)); } Err(err) => { - let mut statuses = HashMap::with_capacity(1); + let mut statuses = HashMap::with_capacity(1 + watchdog_statuses.len()); statuses.insert(*id, session.services.agent_control.get_status(*id).await); + statuses.extend(watchdog_statuses.iter().cloned()); session .send_event( &turn, @@ -124,7 +176,7 @@ impl ToolHandler for Handler { initial_final_statuses } else { let mut futures = FuturesUnordered::new(); - for (id, rx) in status_rxs.into_iter() { + for (id, rx) in status_rxs { let session = session.clone(); futures.push(wait_for_final_status(session, id, rx)); } @@ -153,16 +205,17 @@ impl ToolHandler for Handler { }; let timed_out = statuses.is_empty(); - let statuses_by_id = statuses.clone().into_iter().collect::>(); + let mut statuses_by_id = statuses.clone().into_iter().collect::>(); + statuses_by_id.extend(watchdog_statuses); let agent_statuses = build_wait_agent_statuses(&statuses_by_id, &receiver_agents); let result = WaitAgentResult { - status: statuses - .into_iter() + status: statuses_by_id + .iter() .filter_map(|(thread_id, status)| { target_by_thread_id - .get(&thread_id) + .get(thread_id) .cloned() - .map(|target| (target, status)) + .map(|target| (target, status.clone())) }) .collect(), timed_out, @@ -237,3 +290,20 @@ async fn wait_for_final_status( } } } + +async fn split_wait_ids( + session: &Arc, + requested_thread_ids: Vec, + watchdog_target_ids: &HashSet, + waited_thread_ids: &mut Vec, + watchdog_statuses: &mut Vec<(ThreadId, AgentStatus)>, +) { + for thread_id in requested_thread_ids { + if watchdog_target_ids.contains(&thread_id) { + let status = session.services.agent_control.get_status(thread_id).await; + watchdog_statuses.push((thread_id, status)); + } else { + waited_thread_ids.push(thread_id); + } + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents_tests.rs b/codex-rs/core/src/tools/handlers/multi_agents_tests.rs index 8006ee2204..603484e5f0 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_tests.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_tests.rs @@ -268,7 +268,8 @@ async fn spawn_agent_uses_explorer_role_and_preserves_approval_policy() { "spawn_agent", function_payload(json!({ "message": "inspect this repo", - "agent_type": "explorer" + "agent_type": "explorer", + "fork_context": false })), ); let output = SpawnAgentHandler @@ -295,6 +296,150 @@ async fn spawn_agent_uses_explorer_role_and_preserves_approval_policy() { assert_eq!(snapshot.model_provider_id, "ollama"); } +#[tokio::test] +async fn spawn_agent_fork_context_ignores_child_model_overrides() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let expected_model = turn.model_info.slug.clone(); + let expected_reasoning_effort = turn.reasoning_effort; + + let output = SpawnAgentHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "model": "not-a-real-model", + "reasoning_effort": "low", + "fork_context": true + })), + )) + .await + .expect("spawn_agent should succeed"); + let (content, _) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id( + result["agent_id"] + .as_str() + .expect("spawn_agent result should include agent_id"), + ); + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; + + assert_eq!(snapshot.model, expected_model); + assert_eq!(snapshot.reasoning_effort, expected_reasoning_effort); +} + +#[tokio::test] +async fn multi_agent_v2_spawn_fork_context_ignores_child_model_overrides() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread((*turn.config).clone()) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let turn = TurnContext { + config: Arc::new(config), + ..turn + }; + let expected_model = turn.model_info.slug.clone(); + let expected_reasoning_effort = turn.reasoning_effort; + + let output = SpawnAgentHandlerV2 + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "model": "not-a-real-model", + "reasoning_effort": "low", + "fork_context": true, + "task_name": "fork_context_v2" + })), + )) + .await + .expect("spawn_agent should succeed"); + let (content, _) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + assert_eq!(result["task_name"], "/root/fork_context_v2"); + let agent_id = manager + .captured_ops() + .into_iter() + .map(|(thread_id, _)| thread_id) + .find(|thread_id| *thread_id != root.thread_id) + .expect("spawned agent should receive an op"); + let snapshot = manager + .get_thread(agent_id) + .await + .expect("spawned agent thread should exist") + .config_snapshot() + .await; + + assert_eq!(snapshot.model, expected_model); + assert_eq!(snapshot.reasoning_effort, expected_reasoning_effort); +} + +#[tokio::test] +async fn spawn_agent_watchdog_role_returns_handle_without_spawn_mode() { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + session.services.agent_control = manager.agent_control(); + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + turn.config = Arc::new(config); + + let output = SpawnAgentHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "check in periodically", + "agent_type": "watchdog" + })), + )) + .await + .expect("spawn_agent should succeed"); + let (content, success) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id( + result["agent_id"] + .as_str() + .expect("spawn_agent result should include agent_id"), + ); + + assert_eq!(success, Some(true)); + assert_eq!( + manager.agent_control().get_status(agent_id).await, + AgentStatus::PendingInit + ); +} + #[tokio::test] async fn spawn_agent_returns_agent_id_without_task_name() { let (mut session, turn) = make_session_and_context().await; @@ -307,7 +452,8 @@ async fn spawn_agent_returns_agent_id_without_task_name() { Arc::new(turn), "spawn_agent", function_payload(json!({ - "message": "inspect this repo" + "message": "inspect this repo", + "fork_context": false })), )) .await @@ -317,7 +463,7 @@ async fn spawn_agent_returns_agent_id_without_task_name() { serde_json::from_str(&content).expect("spawn_agent result should be json"); assert!(result["agent_id"].is_string()); - assert!(result.get("task_name").is_none()); + assert_eq!(result["task_name"], serde_json::Value::Null); assert!(result.get("nickname").is_some()); assert_eq!(success, Some(true)); } @@ -344,7 +490,8 @@ async fn multi_agent_v2_spawn_requires_task_name() { Arc::new(turn), "spawn_agent", function_payload(json!({ - "message": "inspect this repo" + "message": "inspect this repo", + "fork_context": false })), ); let Err(err) = SpawnAgentHandlerV2.handle(invocation).await else { @@ -363,7 +510,7 @@ async fn spawn_agent_errors_when_manager_dropped() { Arc::new(session), Arc::new(turn), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let Err(err) = SpawnAgentHandler.handle(invocation).await else { panic!("spawn should fail without a manager"); @@ -1253,7 +1400,8 @@ async fn spawn_agent_reapplies_runtime_sandbox_after_role_config() { "spawn_agent", function_payload(json!({ "message": "await this command", - "agent_type": "explorer" + "agent_type": "explorer", + "fork_context": false })), ); let output = SpawnAgentHandler @@ -1313,7 +1461,7 @@ async fn spawn_agent_rejects_when_depth_limit_exceeded() { Arc::new(session), Arc::new(turn), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let Err(err) = SpawnAgentHandler.handle(invocation).await else { panic!("spawn should fail when depth limit exceeded"); @@ -1353,7 +1501,7 @@ async fn spawn_agent_allows_depth_up_to_configured_max_depth() { Arc::new(session), Arc::new(turn), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let output = SpawnAgentHandler .handle(invocation) @@ -1429,7 +1577,10 @@ async fn send_input_rejects_invalid_id() { let FunctionCallError::RespondToModel(msg) = err else { panic!("expected respond-to-model error"); }; - assert!(msg.starts_with("invalid agent id not-a-uuid:")); + assert_eq!( + msg, + "agent_name must use only lowercase letters, digits, and underscores" + ); } #[tokio::test] @@ -1766,7 +1917,7 @@ async fn wait_agent_rejects_invalid_target() { let FunctionCallError::RespondToModel(msg) = err else { panic!("expected respond-to-model error"); }; - assert!(msg.starts_with("invalid agent id invalid:")); + assert!(msg.contains("invalid")); } #[tokio::test] @@ -1783,7 +1934,7 @@ async fn wait_agent_rejects_empty_targets() { }; assert_eq!( err, - FunctionCallError::RespondToModel("agent ids must be non-empty".to_string()) + FunctionCallError::RespondToModel("agent targets must be non-empty".to_string()) ); } @@ -2507,6 +2658,7 @@ async fn build_agent_spawn_config_uses_turn_context_values() { let config = build_agent_spawn_config(&base_instructions, &turn).expect("spawn config"); let mut expected = (*turn.config).clone(); + expected.features = config.features.clone(); expected.base_instructions = Some(base_instructions.text); expected.model = Some(turn.model_info.slug.clone()); expected.model_provider = turn.provider.clone(); diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs index e18547db92..bca926791a 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs @@ -31,7 +31,7 @@ impl ToolHandler for Handler { let agents = session .services .agent_control - .list_agents(&turn.session_source, args.path_prefix.as_deref()) + .list_agents_by_path(&turn.session_source, args.path_prefix.as_deref()) .await .map_err(collab_spawn_error)?; diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs index eff49c0447..03e8fd8d10 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs @@ -4,7 +4,9 @@ use crate::agent::control::render_input_preview; use crate::agent::next_thread_spawn_depth; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::apply_role_to_config; +use crate::agent::role::default_fork_context_for_role; use codex_protocol::AgentPath; +use codex_protocol::protocol::AgentSpawnMode; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::Op; @@ -49,6 +51,10 @@ impl ToolHandler for Handler { "Agent depth limit reached. Solve the task yourself.".to_string(), )); } + let fork_context = args + .fork_context + .unwrap_or_else(|| default_fork_context_for_role(&turn.config, role_name)); + session .send_event( &turn, @@ -56,18 +62,24 @@ impl ToolHandler for Handler { call_id: call_id.clone(), sender_thread_id: session.conversation_id, prompt: prompt.clone(), - model: args - .model_fallback_list - .as_ref() - .and_then(|list| list.first()) - .map(|candidate| candidate.model.clone()) - .unwrap_or_else(|| args.model.clone().unwrap_or_default()), - reasoning_effort: args - .model_fallback_list - .as_ref() - .and_then(|list| list.first()) - .and_then(|candidate| candidate.reasoning_effort) - .unwrap_or_else(|| args.reasoning_effort.unwrap_or_default()), + model: if fork_context { + String::new() + } else { + args.model_fallback_list + .as_ref() + .and_then(|list| list.first()) + .map(|candidate| candidate.model.clone()) + .unwrap_or_else(|| args.model.clone().unwrap_or_default()) + }, + reasoning_effort: if fork_context { + ReasoningEffort::default() + } else { + args.model_fallback_list + .as_ref() + .and_then(|list| list.first()) + .and_then(|candidate| candidate.reasoning_effort) + .unwrap_or_else(|| args.reasoning_effort.unwrap_or_default()) + }, } .into(), ) @@ -102,29 +114,39 @@ impl ToolHandler for Handler { } (_, initial_operation) => initial_operation, }; - let mut candidates_to_try = collect_spawn_agent_model_candidates( - args.model_fallback_list.as_ref(), - args.model.as_deref(), - args.reasoning_effort, - ); - if candidates_to_try.is_empty() { - candidates_to_try.push(SpawnAgentModelCandidate { + let mut candidates_to_try = if fork_context { + vec![SpawnAgentModelCandidate { model: None, reasoning_effort: None, - }); - } + }] + } else { + let mut candidates = collect_spawn_agent_model_candidates( + args.model_fallback_list.as_ref(), + args.model.as_deref(), + args.reasoning_effort, + ); + if candidates.is_empty() { + candidates.push(SpawnAgentModelCandidate { + model: None, + reasoning_effort: None, + }); + } + candidates + }; let mut spawn_result = None; for (idx, candidate) in candidates_to_try.iter().enumerate() { let mut candidate_config = config.clone(); - apply_requested_spawn_agent_model_overrides( - &session, - turn.as_ref(), - &mut candidate_config, - candidate.model.as_deref(), - candidate.reasoning_effort, - ) - .await?; + if !fork_context { + apply_requested_spawn_agent_model_overrides( + &session, + turn.as_ref(), + &mut candidate_config, + candidate.model.as_deref(), + candidate.reasoning_effort, + ) + .await?; + } apply_role_to_config(&mut candidate_config, role_name) .await .map_err(FunctionCallError::RespondToModel)?; @@ -138,7 +160,8 @@ impl ToolHandler for Handler { initial_agent_op.clone(), Some(spawn_source.clone()), SpawnAgentOptions { - fork_parent_spawn_call_id: args.fork_context.then(|| call_id.clone()), + fork_parent_spawn_call_id: fork_context.then(|| call_id.clone()), + ..Default::default() }, ) .await; @@ -210,6 +233,11 @@ impl ToolHandler for Handler { prompt, model: effective_model, reasoning_effort: effective_reasoning_effort, + spawn_mode: if fork_context { + AgentSpawnMode::Fork + } else { + AgentSpawnMode::Spawn + }, status, } .into(), @@ -244,8 +272,7 @@ struct SpawnAgentArgs { model: Option, model_fallback_list: Option>, reasoning_effort: Option, - #[serde(default)] - fork_context: bool, + fork_context: Option, } #[derive(Debug, Serialize)] diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs index beb2930b22..d3e66b0ee1 100644 --- a/codex-rs/core/src/tools/js_repl/mod.rs +++ b/codex-rs/core/src/tools/js_repl/mod.rs @@ -739,6 +739,11 @@ impl JsReplManager { fn summarize_tool_call_response(response: &ResponseInputItem) -> JsReplToolCallResponseSummary { match response { ResponseInputItem::Message { content, .. } => Self::summarize_message_payload(content), + ResponseInputItem::FunctionCall { arguments, .. } => Self::summarize_text_payload( + Some("function_call"), + JsReplToolCallPayloadKind::FunctionText, + arguments, + ), ResponseInputItem::FunctionCallOutput { output, .. } => { let payload_kind = if output.content_items().is_some() { JsReplToolCallPayloadKind::FunctionContentItems diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index bca1dfc075..c366b89e68 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -58,7 +58,7 @@ use codex_tools::create_code_mode_tool; use codex_tools::create_exec_command_tool; use codex_tools::create_js_repl_reset_tool; use codex_tools::create_js_repl_tool; -use codex_tools::create_list_agents_tool; +use codex_tools::create_list_agents_tool as create_list_agents_tool_v2; use codex_tools::create_list_dir_tool; use codex_tools::create_list_mcp_resource_templates_tool; use codex_tools::create_list_mcp_resources_tool; @@ -188,6 +188,7 @@ pub(crate) struct ToolsConfig { pub can_request_original_image_detail: bool, pub collab_tools: bool, pub multi_agent_v2: bool, + pub agent_watchdog: bool, pub request_user_input: bool, pub default_mode_request_user_input: bool, pub experimental_supported_tools: Vec, @@ -237,6 +238,8 @@ impl ToolsConfig { include_js_repl && features.enabled(Feature::JsReplToolsOnly); let include_collab_tools = features.enabled(Feature::Collab); let include_multi_agent_v2 = features.enabled(Feature::MultiAgentV2); + let include_agent_watchdog = + include_collab_tools && features.enabled(Feature::AgentWatchdog); let include_agent_jobs = features.enabled(Feature::SpawnCsv); let include_request_user_input = !matches!(session_source, SessionSource::SubAgent(_)); let include_default_mode_request_user_input = @@ -322,6 +325,7 @@ impl ToolsConfig { can_request_original_image_detail: include_original_image_detail, collab_tools: include_collab_tools, multi_agent_v2: include_multi_agent_v2, + agent_watchdog: include_agent_watchdog, request_user_input: include_request_user_input, default_mode_request_user_input: include_default_mode_request_user_input, experimental_supported_tools: model_info.experimental_supported_tools.clone(), @@ -637,6 +641,90 @@ fn create_agent_tools_namespace(tools: Vec) -> ToolSpec { }) } +fn create_compact_parent_context_tool() -> ToolSpec { + let properties = BTreeMap::from([ + ( + "reason".to_string(), + JsonSchema::String { + description: Some( + "Optional short reason describing why the parent appears stuck.".to_string(), + ), + }, + ), + ( + "evidence".to_string(), + JsonSchema::String { + description: Some( + "Optional concrete evidence of non-progress, such as repeated identical replies with no tool or file actions.".to_string(), + ), + }, + ), + ]); + + ToolSpec::Function(ResponsesApiTool { + name: "compact_parent_context".to_string(), + description: "Watchdog-only: request compaction for the watchdog helper's parent thread when it is idle and appears stuck." + .to_string(), + strict: false, + defer_loading: Some(true), + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: Some(false.into()), + }, + output_schema: None, + }) +} + +fn create_list_agents_tool(agent_watchdog: bool) -> ToolSpec { + let description = if agent_watchdog { + "List agents spawned by an agent, optionally recursively. This is a status view; polling it will not make a watchdog fire." + } else { + "List agents spawned by an agent, optionally recursively." + }; + let properties = BTreeMap::from([ + ( + "id".to_string(), + JsonSchema::String { + description: Some( + "Identifier of the parent agent whose spawned agents to list. Defaults to the current agent." + .to_string(), + ), + }, + ), + ( + "recursive".to_string(), + JsonSchema::Boolean { + description: Some( + "When true (default), include all descendants recursively. When false, include only direct children." + .to_string(), + ), + }, + ), + ( + "all".to_string(), + JsonSchema::Boolean { + description: Some( + "When true, include completed/failed/canceled agents in addition to live agents." + .to_string(), + ), + }, + ), + ]); + + ToolSpec::Function(ResponsesApiTool { + name: "list_agents".to_string(), + description: description.to_string(), + strict: false, + defer_loading: None, + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: Some(false.into()), + }, + output_schema: None, + }) +} fn register_agent_tool_handler(builder: &mut ToolRegistryBuilder, name: &str, handler: Arc) where H: crate::tools::registry::ToolHandler + 'static, @@ -689,6 +777,8 @@ pub(crate) fn build_specs_with_discoverable_tools( use crate::tools::handlers::UnifiedExecHandler; use crate::tools::handlers::ViewImageHandler; use crate::tools::handlers::multi_agents::CloseAgentHandler; + use crate::tools::handlers::multi_agents::CompactParentContextHandler; + use crate::tools::handlers::multi_agents::ListAgentsHandler; use crate::tools::handlers::multi_agents::ResumeAgentHandler; use crate::tools::handlers::multi_agents::SendInputHandler; use crate::tools::handlers::multi_agents::SpawnAgentHandler; @@ -1032,7 +1122,7 @@ pub(crate) fn build_specs_with_discoverable_tools( if config.collab_tools { if config.multi_agent_v2 { - let agent_tools = vec![ + let mut agent_tools = vec![ create_spawn_agent_tool_v2(SpawnAgentToolOptions { available_models: &config.available_models, agent_type_description: crate::agent::role::spawn_tool_spec::build( @@ -1047,8 +1137,11 @@ pub(crate) fn build_specs_with_discoverable_tools( max_timeout_ms: MAX_WAIT_TIMEOUT_MS, }), create_close_agent_tool_v2(), - create_list_agents_tool(), + create_list_agents_tool_v2(), ]; + if config.agent_watchdog { + agent_tools.push(create_compact_parent_context_tool()); + } push_tool_spec( &mut builder, create_agent_tools_namespace(agent_tools), @@ -1066,7 +1159,7 @@ pub(crate) fn build_specs_with_discoverable_tools( register_agent_tool_handler(&mut builder, "close_agent", Arc::new(CloseAgentHandlerV2)); register_agent_tool_handler(&mut builder, "list_agents", Arc::new(ListAgentsHandlerV2)); } else { - let agent_tools = vec![ + let mut agent_tools = vec![ create_spawn_agent_tool_v1(SpawnAgentToolOptions { available_models: &config.available_models, agent_type_description: crate::agent::role::spawn_tool_spec::build( @@ -1082,6 +1175,10 @@ pub(crate) fn build_specs_with_discoverable_tools( }), create_close_agent_tool_v1(), ]; + if config.agent_watchdog { + agent_tools.push(create_list_agents_tool(config.agent_watchdog)); + agent_tools.push(create_compact_parent_context_tool()); + } push_tool_spec( &mut builder, create_agent_tools_namespace(agent_tools), @@ -1093,6 +1190,12 @@ pub(crate) fn build_specs_with_discoverable_tools( register_agent_tool_handler(&mut builder, "resume_agent", Arc::new(ResumeAgentHandler)); register_agent_tool_handler(&mut builder, "wait_agent", Arc::new(WaitAgentHandler)); register_agent_tool_handler(&mut builder, "close_agent", Arc::new(CloseAgentHandler)); + register_agent_tool_handler(&mut builder, "list_agents", Arc::new(ListAgentsHandler)); + register_agent_tool_handler( + &mut builder, + "compact_parent_context", + Arc::new(CompactParentContextHandler), + ); } } diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index 91fd16a325..c6492de912 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -128,7 +128,7 @@ fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[& use std::collections::HashSet; let mut names = HashSet::new(); let mut duplicates = Vec::new(); - for name in tools.iter().map(ConfiguredToolSpec::name) { + for name in tools.iter().flat_map(configured_tool_spec_names) { if !names.insert(name) { duplicates.push(name); } @@ -145,10 +145,23 @@ fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[& } } +fn configured_tool_spec_names(tool: &ConfiguredToolSpec) -> Vec<&str> { + match &tool.spec { + ToolSpec::Namespace(namespace) => namespace + .tools + .iter() + .map(|tool| match tool { + codex_tools::ResponsesApiNamespaceTool::Function(tool) => tool.name.as_str(), + }) + .collect(), + _ => vec![tool.name()], + } +} + fn assert_lacks_tool_name(tools: &[ConfiguredToolSpec], expected_absent: &str) { let names = tools .iter() - .map(ConfiguredToolSpec::name) + .flat_map(configured_tool_spec_names) .collect::>(); assert!( !names.contains(&expected_absent), @@ -188,10 +201,28 @@ fn wait_agent_timeout_options() -> WaitAgentTimeoutOptions { } fn find_tool<'a>(tools: &'a [ConfiguredToolSpec], expected_name: &str) -> &'a ConfiguredToolSpec { - tools - .iter() - .find(|tool| tool.name() == expected_name) - .unwrap_or_else(|| panic!("expected tool {expected_name}")) + if let Some(tool) = tools.iter().find(|tool| tool.name() == expected_name) { + return tool; + } + for tool in tools { + let ToolSpec::Namespace(namespace) = &tool.spec else { + continue; + }; + if let Some(tool) = namespace.tools.iter().find_map(|tool| match tool { + codex_tools::ResponsesApiNamespaceTool::Function(tool) + if tool.name == expected_name => + { + Some(tool.clone()) + } + _ => None, + }) { + return Box::leak(Box::new(ConfiguredToolSpec::new( + ToolSpec::Function(tool), + /*supports_parallel_tool_calls*/ false, + ))); + } + } + panic!("expected tool {expected_name}") } fn strip_descriptions_schema(schema: &mut JsonSchema) { @@ -357,28 +388,30 @@ fn test_full_toolset_specs_for_gpt5_codex_unified_exec_web_search() { ] { expected.insert(spec.name().to_string(), spec); } - let collab_specs = if config.multi_agent_v2 { + let mut collab_specs = if config.multi_agent_v2 { vec![ create_spawn_agent_tool_v2(spawn_agent_tool_options(&config)), create_send_message_tool(), create_wait_agent_tool_v2(wait_agent_timeout_options()), create_close_agent_tool_v2(), + create_list_agents_tool_v2(), ] } else { - vec![ + let mut collab_specs = vec![ create_spawn_agent_tool_v1(spawn_agent_tool_options(&config)), create_send_input_tool_v1(), + create_resume_agent_tool(), create_wait_agent_tool_v1(wait_agent_timeout_options()), create_close_agent_tool_v1(), - ] + ]; + if config.agent_watchdog { + collab_specs.push(create_list_agents_tool(config.agent_watchdog)); + collab_specs.push(create_compact_parent_context_tool()); + } + collab_specs }; - for spec in collab_specs { - expected.insert(spec.name().to_string(), spec); - } - if !config.multi_agent_v2 { - let spec = create_resume_agent_tool(); - expected.insert(spec.name().to_string(), spec); - } + let spec = create_agent_tools_namespace(collab_specs.split_off(0)); + expected.insert(spec.name().to_string(), spec); if config.exec_permission_approvals_enabled { let spec = create_request_permissions_tool(request_permissions_tool_description()); @@ -1080,7 +1113,16 @@ fn assert_model_tools( let model_visible_specs = router.model_visible_specs(); let tool_names = model_visible_specs .iter() - .map(ToolSpec::name) + .flat_map(|tool| match tool { + ToolSpec::Namespace(namespace) => namespace + .tools + .iter() + .map(|tool| match tool { + codex_tools::ResponsesApiNamespaceTool::Function(tool) => tool.name.as_str(), + }) + .collect::>(), + _ => vec![tool.name()], + }) .collect::>(); assert_eq!(&tool_names, &expected_tools,); } diff --git a/codex-rs/core/subagent_prompt.md b/codex-rs/core/subagent_prompt.md new file mode 100644 index 0000000000..1ca5e2d845 --- /dev/null +++ b/codex-rs/core/subagent_prompt.md @@ -0,0 +1,39 @@ +# You are a Subagent + +You are a **subagent** in a multi-agent Codex session. Your role is no longer root. Your goal is the task given by the parent/root agent. + +Term definitions in this file: +- **parent thread**: the thread that spawned this subagent. +- **root thread**: the top-level user-facing thread. +- **`send_input`**: send a message to an existing agent thread; it does not spawn agents. Delivery is asynchronous. +- **durable state**: thread-level task state needed across later turns/check-ins (not disk/database persistence). +- In this runtime, when a subagent calls `send_input`, `id = "parent"` and `id = "root"` both route to the immediate parent thread. + +## Subagent Responsibilities + +- Stay within parent/root scope (listed files/questions/constraints). Use additional files/tools only when needed to complete or verify the task. +- Blocking question = one clarification to parent via `send_input`. Ask only if missing information would change user-visible output, tool/action choice (multi-agent tool, target thread, or ask-vs-continue), file edits, control flow, or durable-state decisions; otherwise state one assumption and continue. +- Prefer concrete progress: edit files, run commands, and validate outcomes. +- Your responses go to the root/parent agent, not the end user. + +## Multi-Agent Guidance (Upstream Surface) + +The only multi-agent tools available in this environment are `spawn_agent`, `send_input`, `wait`, `close_agent`, and `list_agents`. + +Important: to coordinate with parent/root, use `send_input`. A plain assistant message in your own thread does not reliably notify the parent. + +You can call `send_input` without an `id` (or with `id = "parent"` / `id = "root"`); these forms target the immediate parent thread in this runtime. + +## Reporting Expectations + +When you make meaningful progress or complete a task, report back with: + +- The key outcome. +- Files changed (with paths). +- Commands run. +- Validation performed (tests, checks, or observed outputs). +- Risks, follow-ups, or open questions. + +Be specific enough that the root agent can integrate your work safely. + +Do not reference multi-agent tools that do not exist in the upstream surface. diff --git a/codex-rs/core/subagent_watchdog_prompt.md b/codex-rs/core/subagent_watchdog_prompt.md new file mode 100644 index 0000000000..1d8b9196cb --- /dev/null +++ b/codex-rs/core/subagent_watchdog_prompt.md @@ -0,0 +1,6 @@ +## Watchdog-only Guidance + +If you are acting as a watchdog check-in agent, `compact_parent_context` may be available. + +- Use `compact_parent_context` only when the parent thread is idle and appears stuck. +- `compact_parent_context` is not part of the general subagent tool surface; do not mention or rely on it unless you are explicitly operating as a watchdog check-in agent. diff --git a/codex-rs/core/tests/suite/agent_jobs.rs b/codex-rs/core/tests/suite/agent_jobs.rs index b275b1878a..9810853764 100644 --- a/codex-rs/core/tests/suite/agent_jobs.rs +++ b/codex-rs/core/tests/suite/agent_jobs.rs @@ -205,11 +205,16 @@ fn message_input_texts(body: &Value) -> Vec { }; items .iter() - .filter(|item| item.get("type").and_then(Value::as_str) == Some("message")) + .filter(|item| { + item.get("role").and_then(Value::as_str).is_some() + || item.get("type").and_then(Value::as_str) == Some("message") + }) .filter_map(|item| item.get("content").and_then(Value::as_array)) .flatten() - .filter(|span| span.get("type").and_then(Value::as_str) == Some("input_text")) - .filter_map(|span| span.get("text").and_then(Value::as_str)) + .filter_map(|span| match span.get("type").and_then(Value::as_str) { + Some("input_text") | None => span.get("text").and_then(Value::as_str), + _ => None, + }) .map(str::to_string) .collect() } @@ -320,9 +325,31 @@ async fn spawn_agents_on_csv_runs_and_exports() -> Result<()> { test.submit_turn("run batch job").await?; let output = fs::read_to_string(&output_path)?; - assert!(output.contains("result_json")); - assert!(output.contains("item_id")); - assert!(output.contains("\"item_id\"")); + let mut lines = output.lines(); + let headers = lines.next().expect("csv headers"); + let header_cols = parse_simple_csv_line(headers); + let status_index = header_cols + .iter() + .position(|header| header == "status") + .expect("status column"); + let result_json_index = header_cols + .iter() + .position(|header| header == "result_json") + .expect("result_json column"); + assert!(header_cols.iter().any(|header| header == "result_json")); + assert!(header_cols.iter().any(|header| header == "item_id")); + let rows: Vec> = lines.map(parse_simple_csv_line).collect(); + assert_eq!(rows.len(), 2); + assert_eq!( + rows.iter() + .map(|cols| cols[status_index].as_str()) + .collect::>(), + vec!["completed", "completed"] + ); + assert!( + rows.iter() + .all(|cols| !cols[result_json_index].trim().is_empty()) + ); Ok(()) } @@ -423,21 +450,28 @@ async fn spawn_agents_on_csv_stop_halts_future_items() -> Result<()> { test.submit_turn("run job").await?; let output = fs::read_to_string(&output_path)?; - let rows: Vec<&str> = output.lines().skip(1).collect(); + let mut lines = output.lines(); + let headers = lines.next().expect("csv headers"); + let header_cols = parse_simple_csv_line(headers); + let job_id_index = header_cols + .iter() + .position(|header| header == "job_id") + .expect("job_id column"); + let rows: Vec<&str> = lines.collect(); assert_eq!(rows.len(), 3); - let job_id = rows + let job_id: String = rows .first() - .and_then(|line| { - parse_simple_csv_line(line) - .iter() - .find(|value| value.len() == 36) - .cloned() - }) + .map(|line| parse_simple_csv_line(line)) + .and_then(|cols| cols.get(job_id_index).cloned()) .expect("job_id from csv"); let db = test.codex.state_db().expect("state db"); let job = db.get_agent_job(job_id.as_str()).await?.expect("job"); - assert_eq!(job.status, codex_state::AgentJobStatus::Cancelled); let progress = db.get_agent_job_progress(job_id.as_str()).await?; + assert_eq!( + job.status, + codex_state::AgentJobStatus::Cancelled, + "unexpected final job state: job={job:?} progress={progress:?} output={output}" + ); assert_eq!(progress.total_items, 3); assert_eq!(progress.completed_items, 1); assert_eq!(progress.failed_items, 0); diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 9be02cf97b..f8c9492e4d 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -1746,12 +1746,18 @@ async fn includes_developer_instructions_message_in_request() { .iter() .filter(|item| item.get("role").and_then(|role| role.as_str()) == Some("developer")) .collect(); + let developer_contents: Vec<&str> = developer_messages + .iter() + .filter_map(|item| item.get("content").and_then(serde_json::Value::as_array)) + .flat_map(|content| content.iter()) + .filter(|span| span.get("type").and_then(serde_json::Value::as_str) == Some("input_text")) + .filter_map(|span| span.get("text").and_then(serde_json::Value::as_str)) + .collect(); assert!( - developer_messages + developer_contents .iter() - .any(|item| message_input_texts(item).contains(&"be useful")), - "expected developer instructions in a developer message, got {:?}", - request_body["input"] + .any(|content| content.contains("be useful")), + "expected developer instructions in a developer message, got {developer_contents:?}", ); assert_message_role(&request_body["input"][1], "user"); diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index 1bcd068614..38abc8cca6 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -180,7 +180,8 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> { "spawn_agent", "send_input", "resume_agent", - "wait_agent", + "list_agents", + "wait", "close_agent", ]); let body0 = req1.single_request().body_json(); diff --git a/codex-rs/core/tests/suite/subagent_notifications.rs b/codex-rs/core/tests/suite/subagent_notifications.rs index a1a8c29b9c..12e7b10219 100644 --- a/codex-rs/core/tests/suite/subagent_notifications.rs +++ b/codex-rs/core/tests/suite/subagent_notifications.rs @@ -328,7 +328,7 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { ) .await; - let _child_request_log = mount_sse_once_match( + let child_request_log = mount_sse_once_match( &server, |req: &wiremock::Request| body_contains(req, CHILD_PROMPT), sse(vec![ @@ -362,7 +362,9 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { let _ = seed_turn.single_request(); test.submit_turn(TURN_1_PROMPT).await?; - let _ = spawn_turn.single_request(); + let parent_spawn_request = spawn_turn.single_request(); + let parent_spawn_body = parent_spawn_request.body_json().clone(); + let _ = wait_for_requests(&child_request_log).await?; let deadline = Instant::now() + Duration::from_secs(2); let child_request = loop { @@ -389,6 +391,23 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { let child_body = child_request .body_json::() .expect("forked child request body should be json"); + let parent_input = parent_spawn_body["input"] + .as_array() + .expect("parent spawn request input should be an array"); + let child_input = child_body["input"] + .as_array() + .expect("forked child request input should be an array"); + assert_eq!( + &child_input[..parent_input.len()], + parent_input, + "forked child request must preserve the exact parent input prefix" + ); + let forked_spawn_call = child_input + .get(parent_input.len()) + .unwrap_or_else(|| panic!("expected forked child request to include spawn_agent call")); + assert_eq!(forked_spawn_call["type"].as_str(), Some("function_call")); + assert_eq!(forked_spawn_call["name"].as_str(), Some("spawn_agent")); + assert_eq!(forked_spawn_call["call_id"].as_str(), Some(SPAWN_CALL_ID)); let function_call_output = child_body["input"] .as_array() .and_then(|items| { @@ -465,8 +484,12 @@ async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings() -> "custom".to_string(), AgentRoleConfig { description: Some("Custom role".to_string()), + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); }) @@ -513,8 +536,12 @@ async fn spawn_agent_tool_description_mentions_role_locked_settings() -> Result< "custom".to_string(), AgentRoleConfig { description: Some("Custom role".to_string()), + model: None, config_file: Some(role_path), + spawn_mode: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); }); diff --git a/codex-rs/core/watchdog_agent_prompt.md b/codex-rs/core/watchdog_agent_prompt.md new file mode 100644 index 0000000000..6ff46d661d --- /dev/null +++ b/codex-rs/core/watchdog_agent_prompt.md @@ -0,0 +1,87 @@ +# You are a Subagent + +More importantly, you are a **watchdog check-in agent**. Keep the root agent unblocked, on-task, and executing real work toward the user’s goal. You have full conversation context; messages that appear to be from “you” may have been written by the root agent. +You are one check-in run created by a persistent watchdog timer attached to an owner thread. The timer reuses this prompt on each check-in, but each check-in is a fresh one-shot run (one execution + one report). + +You will be given the target agent id and the original prompt/goal. + +Terms in this file: +- **watchdog**: persistent idle-timer registration. +- **watchdog check-in agent**: this short-lived run instance. +- **owner thread**: the thread that the watchdog monitors and reports to. +- **parent thread**: this watchdog check-in agent’s direct parent; for watchdog check-ins this is the owner thread. +- **`send_input`**: primary way to deliver watchdog guidance to an existing thread; it does not spawn agents. Delivery is asynchronous. +- **durable state**: thread-level task state that must still be available in later turns/check-ins (counters, plans, final decisions), not disk/database persistence. +- **exact-only format**: parent constraint that says to return only specific fields/content. + +## Principles + +- Be concise, directive, and specific: name the file, command, or decision needed now. +- Detect drift or looping immediately. If the root agent is acknowledging without acting, tell it exactly what to do next. +- Break loops by changing framing: propose a shorter plan, identify the blocker, or name the missing command. +- Preserve alignment: restate the user’s goal and the next concrete step. +- Safety and correctness: call out missing tests, skipped checks, or unclear acceptance criteria. +- Output precedence is: system/developer/policy rules first, then parent-task output constraints. If the parent requires exact-only format (for example "only"), return exactly the requested fields/content unless higher-priority rules require extra content. +- If exact-only format is not required, include all requested fields/content and you may add 1-2 short non-conflicting guidance sentences. + +## Operating Procedure (Every Time You Run) + +1. Re-evaluate the user’s latest request and the current status. Independently verify status when needed by reading files, running commands, and checking plan files against recent changes. +2. Identify the single highest-impact next action (or a very short ordered list). +3. Direct the root agent to execute it now (include paths and commands). +4. If blocked, propose one or two crisp unblockers. +5. If the goal appears complete, say so and direct the root agent to close unneeded agents. + +Tone: direct, actionable, minimally polite. Optimize for progress over narration. + +## Detect Looping and Reward Hacking + +The root agent may slip into patterns that look like progress but are not. Interrupt those patterns. + +Watch for: + +- Tests that always pass (tautologies, `assert!(true)`, mocks that cannot fail). +- Marking items complete with only stub implementations. +- "Fixes" that comment out failing tests or code without addressing root causes. +- Claiming success without running required format/lint/tests. +- Ignoring explicit user requirements in favor of quicker but incomplete shortcuts. + +When you detect these, prescribe the corrective action explicitly. + +## Multi-Agent Tools (Upstream Surface) + +Use only the multi-agent tools that exist here: + +- `spawn_agent` (prefer `spawn_mode = "fork"` when shared context matters). +- `send_input`. +- `compact_parent_context` (watchdog-only recovery tool; see below). +- `wait`. +- `close_agent`. + +There is no cancel tool. Use `close_agent` to stop agents that are done or no longer needed. + +When recommending watchdogs to the root agent, keep `agent_type` at the default. + +Important: send watchdog check-in output with `send_input` to the owner/parent thread. A plain assistant message in your own watchdog check-in thread is not a reliable delivery path to the owner. + +Each watchdog check-in runs in a fresh one-shot watchdog check-in agent with no guaranteed continuity across check-ins. Do not keep durable state in watchdog-check-in-agent local memory/files; treat local state as run-local only. Ask the parent to track durable state, and use `send_input` (without `id`, or `id = "parent"`/`"root"`) to report results. + +`send_input` is the primary path for watchdog delivery to parent/owner. If a watchdog check-in agent finishes without `send_input`, runtime forwards one final multi-agent inbox message as the mandatory fallback wake-up path for the owner. Exiting without either `send_input` or a final message is a bug. + +For token protocols (for example `ping N` / `pong N`), treat those as literal text counters, not shell commands. Do not call command-execution tools unless the prompt explicitly asks you to execute commands. + +## Parent Recovery via Context Compaction + +`compact_parent_context` asks the system to abbreviate/compact redundant parent-thread context so the parent can recover from loops. + +Use it only as a last resort: + +- The parent has been repeatedly non-responsive across multiple watchdog check-ins. +- The parent is taking no meaningful actions (no concrete commands/edits/tests) and making no progress. +- You already sent at least one direct corrective instruction with `send_input`, and it was ignored. + +Do not call `compact_parent_context` for routine nudges or normal delays. Prefer precise `send_input` guidance first. + +## Style + +Be explicit when precision matters. Your job is to drive real progress toward the user’s goal. diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index 7f0d326a06..7d7915e3ae 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -140,6 +140,12 @@ pub enum Feature { MultiAgentV2, /// Enable CSV-backed agent job tools. SpawnCsv, + /// Deliver inbound agent messages via a synthetic function-call inbox envelope. + AgentFunctionCallInbox, + /// Enable prepending agent-specific developer instructions for agent sessions. + AgentPromptInjection, + /// Enable watchdog spawning and watchdog-only agent tools. + AgentWatchdog, /// Enable apps. Apps, /// Enable the tool_search tool for apps. @@ -711,12 +717,30 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::AgentFunctionCallInbox, + key: "agent_function_call_inbox", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::SpawnCsv, key: "enable_fanout", stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::AgentPromptInjection, + key: "agent_prompt_injection", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, + FeatureSpec { + id: Feature::AgentWatchdog, + key: "agent_watchdog", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::Apps, key: "apps", @@ -899,3 +923,137 @@ pub fn unstable_features_warning_event( #[cfg(test)] mod tests; +#[cfg(test)] +mod inbox_feature_tests { + use super::*; + + use pretty_assertions::assert_eq; + + #[test] + fn under_development_features_are_disabled_by_default() { + for spec in FEATURES { + if matches!(spec.stage, Stage::UnderDevelopment) { + assert_eq!( + spec.default_enabled, false, + "feature `{}` is under development and must be disabled by default", + spec.key + ); + } + } + } + + #[test] + fn default_enabled_features_are_stable() { + for spec in FEATURES { + if spec.default_enabled { + assert!( + matches!(spec.stage, Stage::Stable | Stage::Removed), + "feature `{}` is enabled by default but is not stable/removed ({:?})", + spec.key, + spec.stage + ); + } + } + } + + #[test] + fn use_linux_sandbox_bwrap_is_removed() { + assert_eq!(Feature::UseLinuxSandboxBwrap.stage(), Stage::Removed); + assert_eq!(Feature::UseLinuxSandboxBwrap.default_enabled(), false); + } + + #[test] + fn js_repl_is_experimental_and_user_toggleable() { + let spec = Feature::JsRepl.info(); + let stage = spec.stage; + let expected_node_version = include_str!("../../node-version.txt").trim_end(); + + assert!(matches!(stage, Stage::Experimental { .. })); + assert_eq!(stage.experimental_menu_name(), Some("JavaScript REPL")); + assert_eq!( + stage.experimental_menu_description().map(str::to_owned), + Some(format!( + "Enable a persistent Node-backed JavaScript REPL for interactive website debugging and other inline JavaScript execution capabilities. Requires Node >= v{expected_node_version} installed." + )) + ); + assert_eq!(Feature::JsRepl.default_enabled(), false); + } + + #[test] + fn guardian_approval_is_experimental_and_user_toggleable() { + let spec = Feature::GuardianApproval.info(); + let stage = spec.stage; + + assert!(matches!(stage, Stage::Experimental { .. })); + assert_eq!(stage.experimental_menu_name(), Some("Smart Approvals")); + assert_eq!( + stage.experimental_menu_description().map(str::to_owned), + Some( + "When Codex needs approval for higher-risk actions (e.g. sandbox escapes or blocked network access), route eligible approval requests to a carefully-prompted security reviewer subagent rather than blocking the agent on your input. This can consume significantly more tokens because it runs a subagent on every approval request.".to_string() + ) + ); + assert_eq!(stage.experimental_announcement(), None); + assert_eq!(Feature::GuardianApproval.default_enabled(), false); + } + + #[test] + fn request_permissions_tool_is_under_development() { + assert_eq!( + Feature::RequestPermissionsTool.stage(), + Stage::UnderDevelopment + ); + assert_eq!(Feature::RequestPermissionsTool.default_enabled(), false); + } + + #[test] + fn image_generation_is_under_development() { + assert_eq!(Feature::ImageGeneration.stage(), Stage::UnderDevelopment); + assert_eq!(Feature::ImageGeneration.default_enabled(), false); + } + + #[test] + fn collab_is_legacy_alias_for_multi_agent() { + assert_eq!(feature_for_key("multi_agent"), Some(Feature::Collab)); + assert_eq!(feature_for_key("collab"), Some(Feature::Collab)); + } + + #[test] + fn apps_require_feature_flag_and_chatgpt_auth() { + let mut features = Features::with_defaults(); + assert!(!features.apps_enabled_for_auth(/*auth*/ None)); + + features.enable(Feature::Apps); + assert!(!features.apps_enabled_for_auth(/*auth*/ None)); + + let api_key_auth = CodexAuth::from_api_key("test-api-key"); + assert!(!features.apps_enabled_for_auth(Some(&api_key_auth))); + + let chatgpt_auth = CodexAuth::create_dummy_chatgpt_auth_for_testing(); + assert!(features.apps_enabled_for_auth(Some(&chatgpt_auth))); + } + + #[test] + fn agent_function_call_inbox_is_under_development() { + assert_eq!( + Feature::AgentFunctionCallInbox.stage(), + Stage::UnderDevelopment + ); + assert_eq!(Feature::AgentFunctionCallInbox.default_enabled(), false); + assert_eq!( + feature_for_key("agent_function_call_inbox"), + Some(Feature::AgentFunctionCallInbox) + ); + } + + #[test] + fn agent_prompt_and_watchdog_features_use_canonical_keys() { + assert_eq!( + feature_for_key("agent_prompt_injection"), + Some(Feature::AgentPromptInjection) + ); + assert_eq!( + feature_for_key("agent_watchdog"), + Some(Feature::AgentWatchdog) + ); + } +} diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index 04763b884c..8b4247006f 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -426,7 +426,9 @@ async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> { "expected permissions developer message, got {developer_contents:?}" ); assert!( - developer_contents.contains(&"Foreshadow upcoming tool calls."), + developer_contents + .iter() + .any(|content| content.contains("Foreshadow upcoming tool calls.")), "expected developer instructions in developer messages, got {developer_contents:?}" ); diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index d4e568fea9..ab0e40d6f0 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -122,6 +122,11 @@ pub enum ResponseInputItem { role: String, content: Vec, }, + FunctionCall { + name: String, + arguments: String, + call_id: String, + }, FunctionCallOutput { call_id: String, #[ts(as = "FunctionCallOutputBody")] @@ -915,6 +920,17 @@ impl From for ResponseItem { end_turn: None, phase: None, }, + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => Self::FunctionCall { + id: None, + name, + namespace: None, + arguments, + call_id, + }, ResponseInputItem::FunctionCallOutput { call_id, output } => { Self::FunctionCallOutput { call_id, output } } diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 38a405a668..bff1eadedc 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -98,6 +98,26 @@ pub const COLLABORATION_MODE_CLOSE_TAG: &str = ""; pub const REALTIME_CONVERSATION_OPEN_TAG: &str = ""; pub const REALTIME_CONVERSATION_CLOSE_TAG: &str = ""; pub const USER_MESSAGE_BEGIN: &str = "## My request for Codex:"; +pub const AGENT_INBOX_KIND: &str = "agent_inbox"; + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] +pub struct AgentInboxPayload { + pub injected: bool, + pub kind: String, + pub sender_thread_id: ThreadId, + pub message: String, +} + +impl AgentInboxPayload { + pub fn new(sender_thread_id: ThreadId, message: String) -> Self { + Self { + injected: true, + kind: AGENT_INBOX_KIND.to_string(), + sender_thread_id, + message, + } + } +} /// Submission Queue Entry - requests from user #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] @@ -242,6 +262,9 @@ pub enum Op { final_output_json_schema: Option, }, + /// Inject non-user response items into an existing turn, or start a turn if needed. + InjectResponseItems { items: Vec }, + /// Similar to [`Op::UserInput`], but contains additional context required /// for a turn of a [`crate::codex_thread::CodexThread`]. UserTurn { @@ -586,6 +609,7 @@ impl Op { Self::UserInputAnswer { .. } => "user_input_answer", Self::RequestPermissionsResponse { .. } => "request_permissions_response", Self::DynamicToolResponse { .. } => "dynamic_tool_response", + Self::InjectResponseItems { .. } => "inject_response_items", Self::AddToHistory { .. } => "add_to_history", Self::GetHistoryEntryRequest { .. } => "get_history_entry_request", Self::ListMcpTools => "list_mcp_tools", @@ -3439,6 +3463,16 @@ pub struct CollabAgentSpawnBeginEvent { pub reasoning_effort: ReasoningEffortConfig, } +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default)] +#[serde(rename_all = "snake_case")] +#[ts(rename_all = "snake_case")] +pub enum AgentSpawnMode { + #[default] + Spawn, + Fork, + Watchdog, +} + #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] pub struct CollabAgentRef { /// Thread ID of the receiver/new agent. @@ -3486,6 +3520,9 @@ pub struct CollabAgentSpawnEndEvent { pub model: String, /// Effective reasoning effort used by the spawned agent after inheritance and role overrides. pub reasoning_effort: ReasoningEffortConfig, + /// Spawn mode used for this agent. + #[serde(default)] + pub spawn_mode: AgentSpawnMode, /// Last known status of the new agent reported to the sender agent. pub status: AgentStatus, } diff --git a/codex-rs/rollout/src/recorder.rs b/codex-rs/rollout/src/recorder.rs index 275fcb0121..bb9a71812a 100644 --- a/codex-rs/rollout/src/recorder.rs +++ b/codex-rs/rollout/src/recorder.rs @@ -596,10 +596,10 @@ impl RolloutRecorder { Ok((items, thread_id, parse_errors)) } - /// Load a rollout for resume semantics. + /// Load a rollout for resuming the same thread. /// - /// This preserves the rollout's existing conversation id and rollout path, so callers must - /// not use it for true forking semantics. + /// This preserves the rollout's existing conversation id and rollout path, so callers + /// must not use it for true forking semantics. pub async fn get_rollout_history(path: &Path) -> std::io::Result { let (items, thread_id, _parse_errors) = Self::load_rollout_items(path).await?; let conversation_id = thread_id @@ -617,10 +617,11 @@ impl RolloutRecorder { })) } - /// Load a rollout for true fork semantics. + /// Load a rollout for forking into a distinct thread. /// /// Unlike `get_rollout_history`, this intentionally discards the source rollout's - /// conversation id so the child thread gets a fresh id and preserves `forked_from_id`. + /// conversation id so `Codex::spawn` allocates a fresh thread id and rollout path for + /// the child. pub async fn get_fork_history(path: &Path) -> std::io::Result { let (items, _thread_id, _parse_errors) = Self::load_rollout_items(path).await?; diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 1267e6b732..9009174b08 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -142,6 +142,7 @@ use codex_protocol::protocol::AgentReasoningEvent; use codex_protocol::protocol::AgentReasoningRawContentDeltaEvent; #[cfg(test)] use codex_protocol::protocol::AgentReasoningRawContentEvent; +use codex_protocol::protocol::AgentSpawnMode; use codex_protocol::protocol::AgentStatus; use codex_protocol::protocol::ApplyPatchApprovalRequestEvent; #[cfg(test)] @@ -3851,6 +3852,10 @@ impl ChatWidget { prompt: prompt.unwrap_or_default(), model: String::new(), reasoning_effort: ReasoningEffortConfig::Medium, + // Thread history items do not carry spawn_mode yet, so the + // replay path must choose an explicit fallback for reconstructed + // spawn rows. Plain spawn is the least surprising default. + spawn_mode: AgentSpawnMode::Spawn, status: first_receiver .as_ref() .and_then(|thread_id| agents_states.get(&thread_id.to_string())) diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 1c6addfa68..b29519dfd3 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -2348,6 +2348,7 @@ async fn collab_spawn_end_shows_requested_model_and_effort() { new_agent_nickname: Some("Robie".to_string()), new_agent_role: Some("explorer".to_string()), prompt: "Explore the repo".to_string(), + spawn_mode: codex_protocol::protocol::AgentSpawnMode::Spawn, model: "gpt-5".to_string(), reasoning_effort: ReasoningEffortConfig::High, status: AgentStatus::PendingInit, diff --git a/codex-rs/tui/src/multi_agents.rs b/codex-rs/tui/src/multi_agents.rs index 293c80fcf0..cb889b7243 100644 --- a/codex-rs/tui/src/multi_agents.rs +++ b/codex-rs/tui/src/multi_agents.rs @@ -583,6 +583,7 @@ fn status_summary_spans(status: &AgentStatus) -> Vec> { mod tests { use super::*; use crate::history_cell::HistoryCell; + use codex_protocol::protocol::AgentSpawnMode; #[cfg(target_os = "macos")] use crossterm::event::KeyEvent; #[cfg(target_os = "macos")] @@ -609,6 +610,7 @@ mod tests { new_agent_nickname: Some("Robie".to_string()), new_agent_role: Some("explorer".to_string()), prompt: "Compute 11! and reply with just the integer result.".to_string(), + spawn_mode: AgentSpawnMode::Spawn, model: "gpt-5".to_string(), reasoning_effort: ReasoningEffortConfig::High, status: AgentStatus::PendingInit, @@ -747,6 +749,7 @@ mod tests { new_agent_nickname: Some("Robie".to_string()), new_agent_role: Some("explorer".to_string()), prompt: String::new(), + spawn_mode: AgentSpawnMode::Spawn, model: "gpt-5".to_string(), reasoning_effort: ReasoningEffortConfig::High, status: AgentStatus::PendingInit, diff --git a/docs/config.md b/docs/config.md index 71f3548deb..47d381b80e 100644 --- a/docs/config.md +++ b/docs/config.md @@ -36,6 +36,33 @@ Codex can run a notification hook when the agent finishes a turn. See the config When Codex knows which client started the turn, the legacy notify JSON payload also includes a top-level `client` field. The TUI reports `codex-tui`, and the app server reports the `clientInfo.name` value from `initialize`. +## Agent Inbox Delivery + +By default, inbound messages from other agents are delivered to non-subagent threads as normal +user input. If you want those handoffs to appear as explicit non-user transcript activity, you can +opt into a synthetic function-call/function-call-output envelope: + +```toml +[features] +agent_function_call_inbox = true +``` + +When enabled, Codex injects inbound agent messages into non-subagent threads as an `agent_inbox` +function-call/function-call-output pair. This is primarily a model-behavior workaround for cases +where you want a subagent handoff to start a valid turn while still being clearly marked as +non-user activity in the transcript. + +Messages sent to subagents continue to arrive as normal user input. + +## Watchdog Interval + +Watchdog agents use the top-level `watchdog_interval_s` setting to decide how long the owner thread +must be idle before a check-in helper is spawned when the `agent_watchdog` feature is enabled. + +```toml +watchdog_interval_s = 60 +``` + ## JSON Schema The generated JSON Schema for `config.toml` lives at `codex-rs/core/config.schema.json`.