mirror of
https://github.com/openai/codex.git
synced 2026-05-02 18:37:01 +00:00
fix(exec-server): retain output until streams close (#18946)
## Why A Mac Bazel run hit a flake in `server::handler::tests::output_and_exit_are_retained_after_notification_receiver_closes` where the read path observed process exit but lost the expected buffered stdout (`first\nsecond\n`). See the [GitHub Actions job](https://github.com/openai/codex/actions/runs/24758468552/job/72436716505) and [BuildBuddy invocation](https://app.buildbuddy.io/invocation/37475a12-4ef2-45fb-ab8a-e49a2aba1d59). The underlying race is that process exit is not the same thing as stdout/stderr closure. If a child or grandchild inherits the pipe write end, or a process duplicates it with `dup2`, the watched process can exit while the stream is still open and more output can still arrive. The exec-server was starting exited-process retention cleanup from the exit event, so the process entry could be removed before the output streams had actually closed. While stress-testing the exec-server unit suite, `server::handler::tests::long_poll_read_fails_after_session_resume` exposed a separate test race: it started a short-lived command that could exit and wake the pending long-poll read before the session-resume assertion observed the resumed-session error. That test is intended to cover resume eviction, not process-exit delivery, so this change keeps the process alive and quiet while the second connection resumes the session. ## What changed - Keep exec-server process entries retained until stdout/stderr streams close, then start the post-exit retention timer from the closed event. - Wake long-poll readers when the closed event is emitted. - Add focused `local_process` unit coverage that proves late output is still retained after the short test retention interval has elapsed, and that closed process entries are eventually evicted. - Add a local and remote regression test where a parent exits while a child keeps inherited stdout open. The child waits on an explicit release file, so the test deterministically observes exit first, releases the child, then requires a nonzero-wait read from the exit sequence to receive the late output. - In `codex-rs/exec-server/src/server/handler/tests.rs`, make `long_poll_read_fails_after_session_resume` run a long-lived silent command instead of a short command that prints and exits. This isolates the test to session-resume behavior and prevents a normal process exit from satisfying the pending long-poll read first. ## Testing - `cargo test -p codex-exec-server exec_process_retains_output_after_exit_until_streams_close` - `cargo test -p codex-exec-server local_process::tests` - `cargo test -p codex-exec-server` - `just fix -p codex-exec-server` - `bazel test //codex-rs/exec-server:exec-server-unit-tests //codex-rs/exec-server:exec-server-exec_process-test //codex-rs/exec-server:exec-server-file_system-test //codex-rs/exec-server:exec-server-http_client-test //codex-rs/exec-server:exec-server-initialize-test //codex-rs/exec-server:exec-server-process-test //codex-rs/exec-server:exec-server-websocket-test` - `bazel test --runs_per_test=25 //codex-rs/exec-server:exec-server-unit-tests` ## Documentation No docs update needed; this is an internal exec-server correctness fix.
This commit is contained in:
@@ -1,5 +1,10 @@
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_exec_server::CODEX_FS_HELPER_ARG1;
|
||||
use codex_exec_server::ExecServerRuntimePaths;
|
||||
@@ -11,6 +16,11 @@ use ctor::ctor;
|
||||
|
||||
pub(crate) mod exec_server;
|
||||
|
||||
pub(crate) const DELAYED_OUTPUT_AFTER_EXIT_PARENT_ARG: &str =
|
||||
"--codex-test-delayed-output-after-exit-parent";
|
||||
|
||||
const DELAYED_OUTPUT_AFTER_EXIT_CHILD_ARG: &str = "--codex-test-delayed-output-after-exit-child";
|
||||
|
||||
#[ctor]
|
||||
pub static TEST_BINARY_DISPATCH_GUARD: Option<TestBinaryDispatchGuard> = {
|
||||
let guard = configure_test_binary_dispatch("codex-exec-server-tests", |exe_name, argv1| {
|
||||
@@ -22,6 +32,7 @@ pub static TEST_BINARY_DISPATCH_GUARD: Option<TestBinaryDispatchGuard> = {
|
||||
}
|
||||
TestBinaryDispatchMode::InstallAliases
|
||||
});
|
||||
maybe_run_delayed_output_after_exit_from_test_binary();
|
||||
maybe_run_exec_server_from_test_binary(guard.as_ref());
|
||||
guard
|
||||
};
|
||||
@@ -39,6 +50,82 @@ pub(crate) fn current_test_binary_helper_paths() -> anyhow::Result<(PathBuf, Opt
|
||||
Ok((current_exe, codex_linux_sandbox_exe))
|
||||
}
|
||||
|
||||
fn maybe_run_delayed_output_after_exit_from_test_binary() {
|
||||
let mut args = env::args();
|
||||
let _program = args.next();
|
||||
let Some(command) = args.next() else {
|
||||
return;
|
||||
};
|
||||
match command.as_str() {
|
||||
DELAYED_OUTPUT_AFTER_EXIT_PARENT_ARG => {
|
||||
let release_path = next_release_path_arg(args);
|
||||
run_delayed_output_after_exit_parent(&release_path);
|
||||
}
|
||||
DELAYED_OUTPUT_AFTER_EXIT_CHILD_ARG => {
|
||||
let release_path = next_release_path_arg(args);
|
||||
run_delayed_output_after_exit_child(&release_path);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_release_path_arg(mut args: impl Iterator<Item = String>) -> PathBuf {
|
||||
let Some(release_path) = args.next() else {
|
||||
eprintln!("expected release path");
|
||||
std::process::exit(1);
|
||||
};
|
||||
if args.next().is_some() {
|
||||
eprintln!("unexpected extra arguments");
|
||||
std::process::exit(1);
|
||||
}
|
||||
PathBuf::from(release_path)
|
||||
}
|
||||
|
||||
fn run_delayed_output_after_exit_parent(release_path: &Path) {
|
||||
let current_exe = match env::current_exe() {
|
||||
Ok(current_exe) => current_exe,
|
||||
Err(error) => {
|
||||
eprintln!("failed to resolve current test binary: {error}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
match Command::new(current_exe)
|
||||
.arg(DELAYED_OUTPUT_AFTER_EXIT_CHILD_ARG)
|
||||
.arg(release_path)
|
||||
.stdin(Stdio::null())
|
||||
.spawn()
|
||||
{
|
||||
Ok(_) => std::process::exit(0),
|
||||
Err(error) => {
|
||||
eprintln!("failed to spawn delayed output child: {error}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run_delayed_output_after_exit_child(release_path: &Path) {
|
||||
for _ in 0..1_000 {
|
||||
if release_path.exists() {
|
||||
let mut stdout = std::io::stdout().lock();
|
||||
if let Err(error) = writeln!(stdout, "late output after exit") {
|
||||
eprintln!("failed to write delayed output: {error}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
if let Err(error) = stdout.flush() {
|
||||
eprintln!("failed to flush delayed output: {error}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
std::process::exit(0);
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(10));
|
||||
}
|
||||
eprintln!(
|
||||
"timed out waiting for release path {}",
|
||||
release_path.display()
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
fn maybe_run_exec_server_from_test_binary(guard: Option<&TestBinaryDispatchGuard>) {
|
||||
let mut args = env::args();
|
||||
let _program = args.next();
|
||||
|
||||
Reference in New Issue
Block a user