feat: exec-server prep for unified exec (#15691)

This PR partially rebase `unified_exec` on the `exec-server` and adapt the `exec-server` accordingly. ## What changed in `exec-server` 1. Replaced the old "broadcast-driven; process-global" event model with process-scoped session events. The goal is to be able to have dedicated handler for each process. 2. Add to protocol contract to support explicit lifecycle status and stream ordering: - `WriteResponse` now returns `WriteStatus` (Accepted, UnknownProcess, StdinClosed, Starting) instead of a bool. - Added seq fields to output/exited notifications. - Added terminal process/closed notification. 3. Demultiplexed remote notifications into per-process channels. Same as for the event sys 4. Local and remote backends now both implement ExecBackend. 5. Local backend wraps internal process ID/operations into per-process ExecProcess objects. 6. Remote backend registers a session channel before launch and unregisters on failed launch. ## What changed in `unified_exec` 1. Added unified process-state model and backend-neutral process wrapper. This will probably disappear in the future, but it makes it easier to keep the work flowing on both side. - `UnifiedExecProcess` now handles both local PTY sessions and remote exec-server processes through a shared `ProcessHandle`. - Added `ProcessState` to track has_exited, exit_code, and terminal failure message consistently across backends. 2. Routed write and lifecycle handling through process-level methods. ## Some rationals 1. The change centralizes execution transport in exec-server while preserving policy and orchestration ownership in core, avoiding duplicated launch approval logic. This comes from internal discussion. 2. Session-scoped events remove coupling/cross-talk between processes and make stream ordering and terminal state explicit (seq, closed, failed). 3. The failure-path surfacing (remote launch failures, write failures, transport disconnects) makes command tool output and cleanup behavior deterministic ## Follow-ups: * Unify the concept of thread ID behind an obfuscated struct * FD handling * Full zsh-fork compatibility * Full network sandboxing compatibility * Handle ws disconnection
2026-04-26 07:35:29 +00:00 · 2026-03-26 14:22:34 +00:00
parent 4a5635b5a0
commit 7dac332c93
24 changed files with 1933 additions and 325 deletions
--- a/codex-rs/exec-server/tests/exec_process.rs
+++ b/codex-rs/exec-server/tests/exec_process.rs
@@ -6,19 +6,23 @@ use std::sync::Arc;

 use anyhow::Result;
 use codex_exec_server::Environment;
+use codex_exec_server::ExecBackend;
 use codex_exec_server::ExecParams;
 use codex_exec_server::ExecProcess;
-use codex_exec_server::ExecResponse;
-use codex_exec_server::ReadParams;
+use codex_exec_server::ReadResponse;
+use codex_exec_server::StartedExecProcess;
 use pretty_assertions::assert_eq;
 use test_case::test_case;
+use tokio::sync::watch;
+use tokio::time::Duration;
+use tokio::time::timeout;

 use common::exec_server::ExecServerHarness;
 use common::exec_server::exec_server;

 struct ProcessContext {
-    process: Arc<dyn ExecProcess>,
-    _server: Option<ExecServerHarness>,
+    backend: Arc<dyn ExecBackend>,
+    server: Option<ExecServerHarness>,
 }

 async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
@@ -26,22 +30,22 @@ async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
        let server = exec_server().await?;
        let environment = Environment::create(Some(server.websocket_url().to_string())).await?;
        Ok(ProcessContext {
-            process: environment.get_executor(),
-            _server: Some(server),
+            backend: environment.get_exec_backend(),
+            server: Some(server),
        })
    } else {
        let environment = Environment::create(/*exec_server_url*/ None).await?;
        Ok(ProcessContext {
-            process: environment.get_executor(),
-            _server: None,
+            backend: environment.get_exec_backend(),
+            server: None,
        })
    }
 }

 async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
    let context = create_process_context(use_remote).await?;
-    let response = context
-        .process
+    let session = context
+        .backend
        .start(ExecParams {
            process_id: "proc-1".to_string(),
            argv: vec!["true".to_string()],
@@ -51,30 +55,197 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
            arg0: None,
        })
        .await?;
-    assert_eq!(
-        response,
-        ExecResponse {
-            process_id: "proc-1".to_string(),
-        }
-    );
+    assert_eq!(session.process.process_id().as_str(), "proc-1");
+    let wake_rx = session.process.subscribe_wake();
+    let (_, exit_code, closed) =
+        collect_process_output_from_reads(session.process, wake_rx).await?;

-    let mut next_seq = 0;
+    assert_eq!(exit_code, Some(0));
+    assert!(closed);
+    Ok(())
+}
+
+async fn read_process_until_change(
+    session: Arc<dyn ExecProcess>,
+    wake_rx: &mut watch::Receiver<u64>,
+    after_seq: Option<u64>,
+) -> Result<ReadResponse> {
+    let response = session
+        .read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
+        .await?;
+    if !response.chunks.is_empty() || response.closed || response.failure.is_some() {
+        return Ok(response);
+    }
+
+    timeout(Duration::from_secs(2), wake_rx.changed()).await??;
+    session
+        .read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
+        .await
+        .map_err(Into::into)
+}
+
+async fn collect_process_output_from_reads(
+    session: Arc<dyn ExecProcess>,
+    mut wake_rx: watch::Receiver<u64>,
+) -> Result<(String, Option<i32>, bool)> {
+    let mut output = String::new();
+    let mut exit_code = None;
+    let mut after_seq = None;
    loop {
-        let read = context
-            .process
-            .read(ReadParams {
-                process_id: "proc-1".to_string(),
-                after_seq: Some(next_seq),
-                max_bytes: None,
-                wait_ms: Some(100),
-            })
-            .await?;
-        next_seq = read.next_seq;
-        if read.exited {
-            assert_eq!(read.exit_code, Some(0));
+        let response =
+            read_process_until_change(Arc::clone(&session), &mut wake_rx, after_seq).await?;
+        if let Some(message) = response.failure {
+            anyhow::bail!("process failed before closed state: {message}");
+        }
+        for chunk in response.chunks {
+            output.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
+            after_seq = Some(chunk.seq);
+        }
+        if response.exited {
+            exit_code = response.exit_code;
+        }
+        if response.closed {
            break;
        }
+        after_seq = response.next_seq.checked_sub(1).or(after_seq);
    }
+    drop(session);
+    Ok((output, exit_code, true))
+}
+
+async fn assert_exec_process_streams_output(use_remote: bool) -> Result<()> {
+    let context = create_process_context(use_remote).await?;
+    let process_id = "proc-stream".to_string();
+    let session = context
+        .backend
+        .start(ExecParams {
+            process_id: process_id.clone(),
+            argv: vec![
+                "/bin/sh".to_string(),
+                "-c".to_string(),
+                "sleep 0.05; printf 'session output\\n'".to_string(),
+            ],
+            cwd: std::env::current_dir()?,
+            env: Default::default(),
+            tty: false,
+            arg0: None,
+        })
+        .await?;
+    assert_eq!(session.process.process_id().as_str(), process_id);
+
+    let StartedExecProcess { process } = session;
+    let wake_rx = process.subscribe_wake();
+    let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
+    assert_eq!(output, "session output\n");
+    assert_eq!(exit_code, Some(0));
+    assert!(closed);
+    Ok(())
+}
+
+async fn assert_exec_process_write_then_read(use_remote: bool) -> Result<()> {
+    let context = create_process_context(use_remote).await?;
+    let process_id = "proc-stdin".to_string();
+    let session = context
+        .backend
+        .start(ExecParams {
+            process_id: process_id.clone(),
+            argv: vec![
+                "/usr/bin/python3".to_string(),
+                "-c".to_string(),
+                "import sys; line = sys.stdin.readline(); sys.stdout.write(f'from-stdin:{line}'); sys.stdout.flush()".to_string(),
+            ],
+            cwd: std::env::current_dir()?,
+            env: Default::default(),
+            tty: true,
+            arg0: None,
+        })
+        .await?;
+    assert_eq!(session.process.process_id().as_str(), process_id);
+
+    tokio::time::sleep(Duration::from_millis(200)).await;
+    session.process.write(b"hello\n".to_vec()).await?;
+    let StartedExecProcess { process } = session;
+    let wake_rx = process.subscribe_wake();
+    let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
+
+    assert!(
+        output.contains("from-stdin:hello"),
+        "unexpected output: {output:?}"
+    );
+    assert_eq!(exit_code, Some(0));
+    assert!(closed);
+    Ok(())
+}
+
+async fn assert_exec_process_preserves_queued_events_before_subscribe(
+    use_remote: bool,
+) -> Result<()> {
+    let context = create_process_context(use_remote).await?;
+    let session = context
+        .backend
+        .start(ExecParams {
+            process_id: "proc-queued".to_string(),
+            argv: vec![
+                "/bin/sh".to_string(),
+                "-c".to_string(),
+                "printf 'queued output\\n'".to_string(),
+            ],
+            cwd: std::env::current_dir()?,
+            env: Default::default(),
+            tty: false,
+            arg0: None,
+        })
+        .await?;
+
+    tokio::time::sleep(Duration::from_millis(200)).await;
+
+    let StartedExecProcess { process } = session;
+    let wake_rx = process.subscribe_wake();
+    let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
+    assert_eq!(output, "queued output\n");
+    assert_eq!(exit_code, Some(0));
+    assert!(closed);
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn remote_exec_process_reports_transport_disconnect() -> Result<()> {
+    let mut context = create_process_context(/*use_remote*/ true).await?;
+    let session = context
+        .backend
+        .start(ExecParams {
+            process_id: "proc-disconnect".to_string(),
+            argv: vec![
+                "/bin/sh".to_string(),
+                "-c".to_string(),
+                "sleep 10".to_string(),
+            ],
+            cwd: std::env::current_dir()?,
+            env: Default::default(),
+            tty: false,
+            arg0: None,
+        })
+        .await?;
+
+    let server = context
+        .server
+        .as_mut()
+        .expect("remote context should include exec-server harness");
+    server.shutdown().await?;
+
+    let mut wake_rx = session.process.subscribe_wake();
+    let response = read_process_until_change(session.process, &mut wake_rx, None).await?;
+    let message = response
+        .failure
+        .expect("disconnect should surface as a failure");
+    assert!(
+        message.starts_with("exec-server transport disconnected"),
+        "unexpected failure message: {message}"
+    );
+    assert!(
+        response.closed,
+        "disconnect should close the process session"
+    );

    Ok(())
 }
@@ -85,3 +256,24 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
 async fn exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
    assert_exec_process_starts_and_exits(use_remote).await
 }
+
+#[test_case(false ; "local")]
+#[test_case(true ; "remote")]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_process_streams_output(use_remote: bool) -> Result<()> {
+    assert_exec_process_streams_output(use_remote).await
+}
+
+#[test_case(false ; "local")]
+#[test_case(true ; "remote")]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_process_write_then_read(use_remote: bool) -> Result<()> {
+    assert_exec_process_write_then_read(use_remote).await
+}
+
+#[test_case(false ; "local")]
+#[test_case(true ; "remote")]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_process_preserves_queued_events_before_subscribe(use_remote: bool) -> Result<()> {
+    assert_exec_process_preserves_queued_events_before_subscribe(use_remote).await
+}