mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
fix: attempt to reduce high cpu usage when using collab (#9776)
Reproduce with a prompt like this with collab enabled: ``` Examine the code at <some subdirectory with a deeply nested project>. Find the most urgent issue to resolve and describe it to me. ``` Existing behavior causes the top-level agent to busy wait on subagents.
This commit is contained in:
@@ -28,6 +28,8 @@ use serde::Serialize;
|
||||
|
||||
pub struct CollabHandler;
|
||||
|
||||
/// Minimum wait timeout to prevent tight polling loops from burning CPU.
|
||||
pub(crate) const MIN_WAIT_TIMEOUT_MS: i64 = 10_000;
|
||||
pub(crate) const DEFAULT_WAIT_TIMEOUT_MS: i64 = 30_000;
|
||||
pub(crate) const MAX_WAIT_TIMEOUT_MS: i64 = 300_000;
|
||||
|
||||
@@ -323,6 +325,8 @@ mod wait {
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// Validate timeout.
|
||||
// Very short timeouts encourage busy-polling loops in the orchestrator prompt and can
|
||||
// cause high CPU usage even with a single active worker, so clamp to a minimum.
|
||||
let timeout_ms = args.timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS);
|
||||
let timeout_ms = match timeout_ms {
|
||||
ms if ms <= 0 => {
|
||||
@@ -330,7 +334,7 @@ mod wait {
|
||||
"timeout_ms must be greater than zero".to_owned(),
|
||||
));
|
||||
}
|
||||
ms => ms.min(MAX_WAIT_TIMEOUT_MS),
|
||||
ms => ms.clamp(MIN_WAIT_TIMEOUT_MS, MAX_WAIT_TIMEOUT_MS),
|
||||
};
|
||||
|
||||
session
|
||||
@@ -1012,7 +1016,7 @@ mod tests {
|
||||
"wait",
|
||||
function_payload(json!({
|
||||
"ids": [agent_id.to_string()],
|
||||
"timeout_ms": 10
|
||||
"timeout_ms": MIN_WAIT_TIMEOUT_MS
|
||||
})),
|
||||
);
|
||||
let output = CollabHandler
|
||||
@@ -1043,6 +1047,37 @@ mod tests {
|
||||
.expect("shutdown should submit");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wait_clamps_short_timeouts_to_minimum() {
|
||||
let (mut session, turn) = make_session_and_context().await;
|
||||
let manager = thread_manager();
|
||||
session.services.agent_control = manager.agent_control();
|
||||
let config = turn.client.config().as_ref().clone();
|
||||
let thread = manager.start_thread(config).await.expect("start thread");
|
||||
let agent_id = thread.thread_id;
|
||||
let invocation = invocation(
|
||||
Arc::new(session),
|
||||
Arc::new(turn),
|
||||
"wait",
|
||||
function_payload(json!({
|
||||
"ids": [agent_id.to_string()],
|
||||
"timeout_ms": 10
|
||||
})),
|
||||
);
|
||||
|
||||
let early = timeout(Duration::from_millis(50), CollabHandler.handle(invocation)).await;
|
||||
assert!(
|
||||
early.is_err(),
|
||||
"wait should not return before the minimum timeout clamp"
|
||||
);
|
||||
|
||||
let _ = thread
|
||||
.thread
|
||||
.submit(Op::Shutdown {})
|
||||
.await
|
||||
.expect("shutdown should submit");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wait_returns_final_status_without_timeout() {
|
||||
let (mut session, turn) = make_session_and_context().await;
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
|
||||
use crate::tools::handlers::collab::DEFAULT_WAIT_TIMEOUT_MS;
|
||||
use crate::tools::handlers::collab::MAX_WAIT_TIMEOUT_MS;
|
||||
use crate::tools::handlers::collab::MIN_WAIT_TIMEOUT_MS;
|
||||
use crate::tools::registry::ToolRegistryBuilder;
|
||||
use codex_protocol::config_types::WebSearchMode;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
@@ -517,7 +518,7 @@ fn create_wait_tool() -> ToolSpec {
|
||||
"timeout_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(format!(
|
||||
"Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS} and max {MAX_WAIT_TIMEOUT_MS}."
|
||||
"Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS}, min {MIN_WAIT_TIMEOUT_MS}, and max {MAX_WAIT_TIMEOUT_MS}. Avoid tight polling loops; prefer longer waits (seconds to minutes)."
|
||||
)),
|
||||
},
|
||||
);
|
||||
|
||||
@@ -52,6 +52,7 @@ You are Codex Orchestrator, based on GPT-5. You are running as an orchestration
|
||||
* Workers must not revert, overwrite, or conflict with others’ work.
|
||||
* By default, workers must not spawn sub-agents unless explicitly allowed.
|
||||
* When multiple workers are active, you may pass multiple IDs to `wait` to react to the first completion and keep the workflow event-driven and use a long timeout (e.g. 5 minutes).
|
||||
* Do not busy-poll `wait` with very short timeouts. Prefer waits measured in seconds (or minutes) so the system is idle while workers run.
|
||||
|
||||
## Collab tools
|
||||
|
||||
|
||||
Reference in New Issue
Block a user