feat: exec-server prep for unified exec (#15691)

This PR partially rebase `unified_exec` on the `exec-server` and adapt
the `exec-server` accordingly.

## What changed in `exec-server`

1. Replaced the old "broadcast-driven; process-global" event model with
process-scoped session events. The goal is to be able to have dedicated
handler for each process.
2. Add to protocol contract to support explicit lifecycle status and
stream ordering:
- `WriteResponse` now returns `WriteStatus` (Accepted, UnknownProcess,
StdinClosed, Starting) instead of a bool.
  - Added seq fields to output/exited notifications.
  - Added terminal process/closed notification.
3. Demultiplexed remote notifications into per-process channels. Same as
for the event sys
4. Local and remote backends now both implement ExecBackend.
5. Local backend wraps internal process ID/operations into per-process
ExecProcess objects.
6. Remote backend registers a session channel before launch and
unregisters on failed launch.

## What changed in `unified_exec`

1. Added unified process-state model and backend-neutral process
wrapper. This will probably disappear in the future, but it makes it
easier to keep the work flowing on both side.
- `UnifiedExecProcess` now handles both local PTY sessions and remote
exec-server processes through a shared `ProcessHandle`.
- Added `ProcessState` to track has_exited, exit_code, and terminal
failure message consistently across backends.
2. Routed write and lifecycle handling through process-level methods.

## Some rationals

1. The change centralizes execution transport in exec-server while
preserving policy and orchestration ownership in core, avoiding
duplicated launch approval logic. This comes from internal discussion.
2. Session-scoped events remove coupling/cross-talk between processes
and make stream ordering and terminal state explicit (seq, closed,
failed).
3. The failure-path surfacing (remote launch failures, write failures,
transport disconnects) makes command tool output and cleanup behavior
deterministic

## Follow-ups:
* Unify the concept of thread ID behind an obfuscated struct
* FD handling
* Full zsh-fork compatibility
* Full network sandboxing compatibility
* Handle ws disconnection
This commit is contained in:
jif-oai
2026-03-26 14:22:34 +00:00
committed by GitHub
parent 4a5635b5a0
commit 7dac332c93
24 changed files with 1933 additions and 325 deletions

View File

@@ -6,19 +6,23 @@ use std::sync::Arc;
use anyhow::Result;
use codex_exec_server::Environment;
use codex_exec_server::ExecBackend;
use codex_exec_server::ExecParams;
use codex_exec_server::ExecProcess;
use codex_exec_server::ExecResponse;
use codex_exec_server::ReadParams;
use codex_exec_server::ReadResponse;
use codex_exec_server::StartedExecProcess;
use pretty_assertions::assert_eq;
use test_case::test_case;
use tokio::sync::watch;
use tokio::time::Duration;
use tokio::time::timeout;
use common::exec_server::ExecServerHarness;
use common::exec_server::exec_server;
struct ProcessContext {
process: Arc<dyn ExecProcess>,
_server: Option<ExecServerHarness>,
backend: Arc<dyn ExecBackend>,
server: Option<ExecServerHarness>,
}
async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
@@ -26,22 +30,22 @@ async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
let server = exec_server().await?;
let environment = Environment::create(Some(server.websocket_url().to_string())).await?;
Ok(ProcessContext {
process: environment.get_executor(),
_server: Some(server),
backend: environment.get_exec_backend(),
server: Some(server),
})
} else {
let environment = Environment::create(/*exec_server_url*/ None).await?;
Ok(ProcessContext {
process: environment.get_executor(),
_server: None,
backend: environment.get_exec_backend(),
server: None,
})
}
}
async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let response = context
.process
let session = context
.backend
.start(ExecParams {
process_id: "proc-1".to_string(),
argv: vec!["true".to_string()],
@@ -51,30 +55,197 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
arg0: None,
})
.await?;
assert_eq!(
response,
ExecResponse {
process_id: "proc-1".to_string(),
}
);
assert_eq!(session.process.process_id().as_str(), "proc-1");
let wake_rx = session.process.subscribe_wake();
let (_, exit_code, closed) =
collect_process_output_from_reads(session.process, wake_rx).await?;
let mut next_seq = 0;
assert_eq!(exit_code, Some(0));
assert!(closed);
Ok(())
}
async fn read_process_until_change(
session: Arc<dyn ExecProcess>,
wake_rx: &mut watch::Receiver<u64>,
after_seq: Option<u64>,
) -> Result<ReadResponse> {
let response = session
.read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
.await?;
if !response.chunks.is_empty() || response.closed || response.failure.is_some() {
return Ok(response);
}
timeout(Duration::from_secs(2), wake_rx.changed()).await??;
session
.read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
.await
.map_err(Into::into)
}
async fn collect_process_output_from_reads(
session: Arc<dyn ExecProcess>,
mut wake_rx: watch::Receiver<u64>,
) -> Result<(String, Option<i32>, bool)> {
let mut output = String::new();
let mut exit_code = None;
let mut after_seq = None;
loop {
let read = context
.process
.read(ReadParams {
process_id: "proc-1".to_string(),
after_seq: Some(next_seq),
max_bytes: None,
wait_ms: Some(100),
})
.await?;
next_seq = read.next_seq;
if read.exited {
assert_eq!(read.exit_code, Some(0));
let response =
read_process_until_change(Arc::clone(&session), &mut wake_rx, after_seq).await?;
if let Some(message) = response.failure {
anyhow::bail!("process failed before closed state: {message}");
}
for chunk in response.chunks {
output.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
after_seq = Some(chunk.seq);
}
if response.exited {
exit_code = response.exit_code;
}
if response.closed {
break;
}
after_seq = response.next_seq.checked_sub(1).or(after_seq);
}
drop(session);
Ok((output, exit_code, true))
}
async fn assert_exec_process_streams_output(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-stream".to_string();
let session = context
.backend
.start(ExecParams {
process_id: process_id.clone(),
argv: vec![
"/bin/sh".to_string(),
"-c".to_string(),
"sleep 0.05; printf 'session output\\n'".to_string(),
],
cwd: std::env::current_dir()?,
env: Default::default(),
tty: false,
arg0: None,
})
.await?;
assert_eq!(session.process.process_id().as_str(), process_id);
let StartedExecProcess { process } = session;
let wake_rx = process.subscribe_wake();
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
assert_eq!(output, "session output\n");
assert_eq!(exit_code, Some(0));
assert!(closed);
Ok(())
}
async fn assert_exec_process_write_then_read(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-stdin".to_string();
let session = context
.backend
.start(ExecParams {
process_id: process_id.clone(),
argv: vec![
"/usr/bin/python3".to_string(),
"-c".to_string(),
"import sys; line = sys.stdin.readline(); sys.stdout.write(f'from-stdin:{line}'); sys.stdout.flush()".to_string(),
],
cwd: std::env::current_dir()?,
env: Default::default(),
tty: true,
arg0: None,
})
.await?;
assert_eq!(session.process.process_id().as_str(), process_id);
tokio::time::sleep(Duration::from_millis(200)).await;
session.process.write(b"hello\n".to_vec()).await?;
let StartedExecProcess { process } = session;
let wake_rx = process.subscribe_wake();
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
assert!(
output.contains("from-stdin:hello"),
"unexpected output: {output:?}"
);
assert_eq!(exit_code, Some(0));
assert!(closed);
Ok(())
}
async fn assert_exec_process_preserves_queued_events_before_subscribe(
use_remote: bool,
) -> Result<()> {
let context = create_process_context(use_remote).await?;
let session = context
.backend
.start(ExecParams {
process_id: "proc-queued".to_string(),
argv: vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'queued output\\n'".to_string(),
],
cwd: std::env::current_dir()?,
env: Default::default(),
tty: false,
arg0: None,
})
.await?;
tokio::time::sleep(Duration::from_millis(200)).await;
let StartedExecProcess { process } = session;
let wake_rx = process.subscribe_wake();
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
assert_eq!(output, "queued output\n");
assert_eq!(exit_code, Some(0));
assert!(closed);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_exec_process_reports_transport_disconnect() -> Result<()> {
let mut context = create_process_context(/*use_remote*/ true).await?;
let session = context
.backend
.start(ExecParams {
process_id: "proc-disconnect".to_string(),
argv: vec![
"/bin/sh".to_string(),
"-c".to_string(),
"sleep 10".to_string(),
],
cwd: std::env::current_dir()?,
env: Default::default(),
tty: false,
arg0: None,
})
.await?;
let server = context
.server
.as_mut()
.expect("remote context should include exec-server harness");
server.shutdown().await?;
let mut wake_rx = session.process.subscribe_wake();
let response = read_process_until_change(session.process, &mut wake_rx, None).await?;
let message = response
.failure
.expect("disconnect should surface as a failure");
assert!(
message.starts_with("exec-server transport disconnected"),
"unexpected failure message: {message}"
);
assert!(
response.closed,
"disconnect should close the process session"
);
Ok(())
}
@@ -85,3 +256,24 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
async fn exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
assert_exec_process_starts_and_exits(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_process_streams_output(use_remote: bool) -> Result<()> {
assert_exec_process_streams_output(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_process_write_then_read(use_remote: bool) -> Result<()> {
assert_exec_process_write_then_read(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_process_preserves_queued_events_before_subscribe(use_remote: bool) -> Result<()> {
assert_exec_process_preserves_queued_events_before_subscribe(use_remote).await
}