mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
114 lines
4.1 KiB
Markdown
114 lines
4.1 KiB
Markdown
# PR #1852: exec: timeout on grandchildren
|
|
|
|
- URL: https://github.com/openai/codex/pull/1852
|
|
- Author: md-oai
|
|
- Created: 2025-08-05 18:04:58 UTC
|
|
- Updated: 2025-08-05 21:18:43 UTC
|
|
- Changes: +43/-2, Files changed: 1, Commits: 3
|
|
|
|
## Description
|
|
|
|
We were enforcing the 10 s wall-clock limit only on the child process. If that child (bash) spawns grandchildren and we kill it on timeout, those grandchildren still have the original stdout/err pipe open, so the background tasks that are draining the pipes block forever
|
|
|
|
## Full Diff
|
|
|
|
```diff
|
|
diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs
|
|
index dce02cc5e2..a8bbd0f52c 100644
|
|
--- a/codex-rs/core/src/exec.rs
|
|
+++ b/codex-rs/core/src/exec.rs
|
|
@@ -344,8 +344,49 @@ pub(crate) async fn consume_truncated_output(
|
|
}
|
|
};
|
|
|
|
- let stdout = stdout_handle.await??;
|
|
- let stderr = stderr_handle.await??;
|
|
+ // Wait for the stdout/stderr collection tasks but guard against them
|
|
+ // hanging forever. In the normal case both pipes are closed once the child
|
|
+ // terminates so the tasks exit quickly. However, if the child process
|
|
+ // spawned grandchildren that inherited its stdout/stderr file descriptors
|
|
+ // those pipes may stay open after we `kill` the direct child on timeout.
|
|
+ // That would cause the `read_capped` tasks to block on `read()`
|
|
+ // indefinitely, effectively hanging the whole agent.
|
|
+
|
|
+ const IO_DRAIN_TIMEOUT_MS: u64 = 2_000; // 2 s should be plenty for local pipes
|
|
+
|
|
+ // We need mutable bindings so we can `abort()` them on timeout.
|
|
+ use tokio::task::JoinHandle;
|
|
+
|
|
+ async fn await_with_timeout(
|
|
+ handle: &mut JoinHandle<std::io::Result<Vec<u8>>>,
|
|
+ timeout: Duration,
|
|
+ ) -> std::io::Result<Vec<u8>> {
|
|
+ match tokio::time::timeout(timeout, &mut *handle).await {
|
|
+ Ok(join_res) => match join_res {
|
|
+ Ok(io_res) => io_res,
|
|
+ Err(join_err) => Err(std::io::Error::other(join_err)),
|
|
+ },
|
|
+ Err(_elapsed) => {
|
|
+ // Timeout: abort the task to avoid hanging on open pipes.
|
|
+ handle.abort();
|
|
+ Ok(Vec::new())
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let mut stdout_handle = stdout_handle;
|
|
+ let mut stderr_handle = stderr_handle;
|
|
+
|
|
+ let stdout = await_with_timeout(
|
|
+ &mut stdout_handle,
|
|
+ Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
|
|
+ )
|
|
+ .await?;
|
|
+ let stderr = await_with_timeout(
|
|
+ &mut stderr_handle,
|
|
+ Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
|
|
+ )
|
|
+ .await?;
|
|
|
|
Ok(RawExecToolCallOutput {
|
|
exit_status,
|
|
```
|
|
|
|
## Review Comments
|
|
|
|
### codex-rs/core/src/exec.rs
|
|
|
|
- Created: 2025-08-05 18:42:23 UTC | Link: https://github.com/openai/codex/pull/1852#discussion_r2255069686
|
|
|
|
```diff
|
|
@@ -344,8 +344,50 @@ pub(crate) async fn consume_truncated_output(
|
|
}
|
|
};
|
|
|
|
- let stdout = stdout_handle.await??;
|
|
- let stderr = stderr_handle.await??;
|
|
+ // Wait for the stdout/stderr collection tasks but guard against them
|
|
+ // hanging forever. In the normal case both pipes are closed once the child
|
|
+ // terminates so the tasks exit quickly. However, if the child process
|
|
+ // spawned grandchildren that inherited its stdout/stderr file descriptors
|
|
+ // those pipes may stay open after we `kill` the direct child on timeout.
|
|
+ // That would cause the `read_capped` tasks to block on `read()`
|
|
+ // indefinitely, effectively hanging the whole agent.
|
|
+
|
|
+ const IO_DRAIN_TIMEOUT_MS: u64 = 2_000; // 2 s should be plenty for local pipes
|
|
+
|
|
+ // We need mutable bindings so we can `abort()` them on timeout.
|
|
+ use tokio::task::JoinHandle;
|
|
+
|
|
+ async fn await_with_timeout(
|
|
+ handle: &mut JoinHandle<std::io::Result<Vec<u8>>>,
|
|
+ timeout: Duration,
|
|
+ ) -> std::io::Result<Vec<u8>> {
|
|
+ tokio::select! {
|
|
+ join_res = &mut *handle => {
|
|
+ match join_res {
|
|
+ Ok(io_res) => io_res,
|
|
+ Err(join_err) => Err(std::io::Error::other(join_err)),
|
|
+ }
|
|
+ },
|
|
+ _ = tokio::time::sleep(timeout) => {
|
|
```
|
|
|
|
> Can/should we use `use tokio::time::timeout` instead? |