mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
app-server tests: reduce intermittent nextest LEAK via graceful child shutdown (#12266)
## Why `cargo nextest` was intermittently reporting `LEAK` for `codex-app-server` tests even when assertions passed. This adds noise and flakiness to local/CI signals. Sample output used as the basis of this investigation: ```text LEAK [ 7.578s] ( 149/3663) codex-app-server::all suite::output_schema::send_user_turn_output_schema_is_per_turn_v1 LEAK [ 7.383s] ( 210/3663) codex-app-server::all suite::v2::dynamic_tools::dynamic_tool_call_round_trip_sends_text_content_items_to_model LEAK [ 7.768s] ( 213/3663) codex-app-server::all suite::v2::dynamic_tools::thread_start_injects_dynamic_tools_into_model_requests LEAK [ 8.841s] ( 224/3663) codex-app-server::all suite::v2::output_schema::turn_start_accepts_output_schema_v2 LEAK [ 8.151s] ( 225/3663) codex-app-server::all suite::v2::plan_item::plan_mode_uses_proposed_plan_block_for_plan_item LEAK [ 8.230s] ( 232/3663) codex-app-server::all suite::v2::safety_check_downgrade::openai_model_header_mismatch_emits_model_rerouted_notification_v2 LEAK [ 6.472s] ( 273/3663) codex-app-server::all suite::v2::turn_start::turn_start_accepts_collaboration_mode_override_v2 LEAK [ 6.107s] ( 275/3663) codex-app-server::all suite::v2::turn_start::turn_start_accepts_personality_override_v2 ``` ## How I Reproduced I focused on the suspect tests and ran them under `nextest` stress mode with leak reporting enabled. ```bash cargo nextest run -p codex-app-server -j 2 --no-fail-fast --stress-count 25 --status-level leak --final-status-level fail -E 'test(suite::output_schema::send_user_turn_output_schema_is_per_turn_v1) | test(suite::v2::dynamic_tools::dynamic_tool_call_round_trip_sends_text_content_items_to_model) | test(suite::v2::dynamic_tools::thread_start_injects_dynamic_tools_into_model_requests) | test(suite::v2::output_schema::turn_start_accepts_output_schema_v2) | test(suite::v2::plan_item::plan_mode_uses_proposed_plan_block_for_plan_item) | test(suite::v2::safety_check_downgrade::openai_model_header_mismatch_emits_model_rerouted_notification_v2) | test(suite::v2::turn_start::turn_start_accepts_collaboration_mode_override_v2) | test(suite::v2::turn_start::turn_start_accepts_personality_override_v2)' ``` This reproduced intermittent `LEAK` statuses while tests still passed. ## What Changed In `codex-rs/app-server/tests/common/mcp_process.rs`: - Changed `stdin: ChildStdin` to `stdin: Option<ChildStdin>` so teardown can explicitly close stdin. - In `Drop`, close stdin first to trigger EOF-based graceful shutdown. - Wait briefly for graceful exit. - If still running, fall back to `start_kill()` and the existing bounded `try_wait()` loop. - Updated send-path handling to bail if stdin is already closed. ## Why This Is the Right Fix The leak signal was caused by child-process teardown timing, not test-logic assertion failure. The helper previously relied mostly on force-kill timing in `Drop`; that can race with nextest leak detection. Closing stdin first gives `codex-app-server` a deterministic, graceful shutdown path before force-kill. Keeping the force-kill fallback preserves robustness if graceful shutdown does not complete in time. ## Verification - `cargo test -p codex-app-server` - Re-ran the stress repro above after this change: no `LEAK` statuses observed. - Additional high-signal stress run also showed no leaks: ```bash cargo nextest run -p codex-app-server -j 2 --no-fail-fast --stress-count 100 --status-level leak --final-status-level fail -E 'test(suite::output_schema::send_user_turn_output_schema_is_per_turn_v1) | test(suite::v2::dynamic_tools::dynamic_tool_call_round_trip_sends_text_content_items_to_model)' ```
This commit is contained in:
@@ -75,7 +75,7 @@ pub struct McpProcess {
|
||||
/// not a guarantee. See the `kill_on_drop` documentation for details.
|
||||
#[allow(dead_code)]
|
||||
process: Child,
|
||||
stdin: ChildStdin,
|
||||
stdin: Option<ChildStdin>,
|
||||
stdout: BufReader<ChildStdout>,
|
||||
pending_messages: VecDeque<JSONRPCMessage>,
|
||||
}
|
||||
@@ -145,7 +145,7 @@ impl McpProcess {
|
||||
Ok(Self {
|
||||
next_request_id: AtomicI64::new(0),
|
||||
process,
|
||||
stdin,
|
||||
stdin: Some(stdin),
|
||||
stdout,
|
||||
pending_messages: VecDeque::new(),
|
||||
})
|
||||
@@ -811,10 +811,13 @@ impl McpProcess {
|
||||
|
||||
async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
|
||||
eprintln!("writing message to stdin: {message:?}");
|
||||
let Some(stdin) = self.stdin.as_mut() else {
|
||||
anyhow::bail!("mcp stdin closed");
|
||||
};
|
||||
let payload = serde_json::to_string(&message)?;
|
||||
self.stdin.write_all(payload.as_bytes()).await?;
|
||||
self.stdin.write_all(b"\n").await?;
|
||||
self.stdin.flush().await?;
|
||||
stdin.write_all(payload.as_bytes()).await?;
|
||||
stdin.write_all(b"\n").await?;
|
||||
stdin.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -961,8 +964,22 @@ impl Drop for McpProcess {
|
||||
//
|
||||
// Drop can't be async, so we do a bounded synchronous cleanup:
|
||||
//
|
||||
// 1. Request termination with `start_kill()`.
|
||||
// 2. Poll `try_wait()` until the OS reports the child exited, with a short timeout.
|
||||
// 1. Close stdin to request a graceful shutdown via EOF.
|
||||
// 2. Poll briefly for graceful exit.
|
||||
// 3. If still alive, request termination with `start_kill()`.
|
||||
// 4. Poll `try_wait()` until the OS reports the child exited, with a short timeout.
|
||||
drop(self.stdin.take());
|
||||
|
||||
let graceful_start = std::time::Instant::now();
|
||||
let graceful_timeout = std::time::Duration::from_millis(200);
|
||||
while graceful_start.elapsed() < graceful_timeout {
|
||||
match self.process.try_wait() {
|
||||
Ok(Some(_)) => return,
|
||||
Ok(None) => std::thread::sleep(std::time::Duration::from_millis(5)),
|
||||
Err(_) => return,
|
||||
}
|
||||
}
|
||||
|
||||
let _ = self.process.start_kill();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
Reference in New Issue
Block a user