mirror of
https://github.com/openai/codex.git
synced 2026-04-26 07:35:29 +00:00
feat: exec-server prep for unified exec (#15691)
This PR partially rebase `unified_exec` on the `exec-server` and adapt the `exec-server` accordingly. ## What changed in `exec-server` 1. Replaced the old "broadcast-driven; process-global" event model with process-scoped session events. The goal is to be able to have dedicated handler for each process. 2. Add to protocol contract to support explicit lifecycle status and stream ordering: - `WriteResponse` now returns `WriteStatus` (Accepted, UnknownProcess, StdinClosed, Starting) instead of a bool. - Added seq fields to output/exited notifications. - Added terminal process/closed notification. 3. Demultiplexed remote notifications into per-process channels. Same as for the event sys 4. Local and remote backends now both implement ExecBackend. 5. Local backend wraps internal process ID/operations into per-process ExecProcess objects. 6. Remote backend registers a session channel before launch and unregisters on failed launch. ## What changed in `unified_exec` 1. Added unified process-state model and backend-neutral process wrapper. This will probably disappear in the future, but it makes it easier to keep the work flowing on both side. - `UnifiedExecProcess` now handles both local PTY sessions and remote exec-server processes through a shared `ProcessHandle`. - Added `ProcessState` to track has_exited, exit_code, and terminal failure message consistently across backends. 2. Routed write and lifecycle handling through process-level methods. ## Some rationals 1. The change centralizes execution transport in exec-server while preserving policy and orchestration ownership in core, avoiding duplicated launch approval logic. This comes from internal discussion. 2. Session-scoped events remove coupling/cross-talk between processes and make stream ordering and terminal state explicit (seq, closed, failed). 3. The failure-path surfacing (remote launch failures, write failures, transport disconnects) makes command tool output and cleanup behavior deterministic ## Follow-ups: * Unify the concept of thread ID behind an obfuscated struct * FD handling * Full zsh-fork compatibility * Full network sandboxing compatibility * Handle ws disconnection
This commit is contained in:
@@ -6,19 +6,23 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_exec_server::Environment;
|
||||
use codex_exec_server::ExecBackend;
|
||||
use codex_exec_server::ExecParams;
|
||||
use codex_exec_server::ExecProcess;
|
||||
use codex_exec_server::ExecResponse;
|
||||
use codex_exec_server::ReadParams;
|
||||
use codex_exec_server::ReadResponse;
|
||||
use codex_exec_server::StartedExecProcess;
|
||||
use pretty_assertions::assert_eq;
|
||||
use test_case::test_case;
|
||||
use tokio::sync::watch;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use common::exec_server::ExecServerHarness;
|
||||
use common::exec_server::exec_server;
|
||||
|
||||
struct ProcessContext {
|
||||
process: Arc<dyn ExecProcess>,
|
||||
_server: Option<ExecServerHarness>,
|
||||
backend: Arc<dyn ExecBackend>,
|
||||
server: Option<ExecServerHarness>,
|
||||
}
|
||||
|
||||
async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
|
||||
@@ -26,22 +30,22 @@ async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
|
||||
let server = exec_server().await?;
|
||||
let environment = Environment::create(Some(server.websocket_url().to_string())).await?;
|
||||
Ok(ProcessContext {
|
||||
process: environment.get_executor(),
|
||||
_server: Some(server),
|
||||
backend: environment.get_exec_backend(),
|
||||
server: Some(server),
|
||||
})
|
||||
} else {
|
||||
let environment = Environment::create(/*exec_server_url*/ None).await?;
|
||||
Ok(ProcessContext {
|
||||
process: environment.get_executor(),
|
||||
_server: None,
|
||||
backend: environment.get_exec_backend(),
|
||||
server: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
|
||||
let context = create_process_context(use_remote).await?;
|
||||
let response = context
|
||||
.process
|
||||
let session = context
|
||||
.backend
|
||||
.start(ExecParams {
|
||||
process_id: "proc-1".to_string(),
|
||||
argv: vec!["true".to_string()],
|
||||
@@ -51,30 +55,197 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
|
||||
arg0: None,
|
||||
})
|
||||
.await?;
|
||||
assert_eq!(
|
||||
response,
|
||||
ExecResponse {
|
||||
process_id: "proc-1".to_string(),
|
||||
}
|
||||
);
|
||||
assert_eq!(session.process.process_id().as_str(), "proc-1");
|
||||
let wake_rx = session.process.subscribe_wake();
|
||||
let (_, exit_code, closed) =
|
||||
collect_process_output_from_reads(session.process, wake_rx).await?;
|
||||
|
||||
let mut next_seq = 0;
|
||||
assert_eq!(exit_code, Some(0));
|
||||
assert!(closed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_process_until_change(
|
||||
session: Arc<dyn ExecProcess>,
|
||||
wake_rx: &mut watch::Receiver<u64>,
|
||||
after_seq: Option<u64>,
|
||||
) -> Result<ReadResponse> {
|
||||
let response = session
|
||||
.read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
|
||||
.await?;
|
||||
if !response.chunks.is_empty() || response.closed || response.failure.is_some() {
|
||||
return Ok(response);
|
||||
}
|
||||
|
||||
timeout(Duration::from_secs(2), wake_rx.changed()).await??;
|
||||
session
|
||||
.read(after_seq, /*max_bytes*/ None, /*wait_ms*/ Some(0))
|
||||
.await
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
async fn collect_process_output_from_reads(
|
||||
session: Arc<dyn ExecProcess>,
|
||||
mut wake_rx: watch::Receiver<u64>,
|
||||
) -> Result<(String, Option<i32>, bool)> {
|
||||
let mut output = String::new();
|
||||
let mut exit_code = None;
|
||||
let mut after_seq = None;
|
||||
loop {
|
||||
let read = context
|
||||
.process
|
||||
.read(ReadParams {
|
||||
process_id: "proc-1".to_string(),
|
||||
after_seq: Some(next_seq),
|
||||
max_bytes: None,
|
||||
wait_ms: Some(100),
|
||||
})
|
||||
.await?;
|
||||
next_seq = read.next_seq;
|
||||
if read.exited {
|
||||
assert_eq!(read.exit_code, Some(0));
|
||||
let response =
|
||||
read_process_until_change(Arc::clone(&session), &mut wake_rx, after_seq).await?;
|
||||
if let Some(message) = response.failure {
|
||||
anyhow::bail!("process failed before closed state: {message}");
|
||||
}
|
||||
for chunk in response.chunks {
|
||||
output.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
|
||||
after_seq = Some(chunk.seq);
|
||||
}
|
||||
if response.exited {
|
||||
exit_code = response.exit_code;
|
||||
}
|
||||
if response.closed {
|
||||
break;
|
||||
}
|
||||
after_seq = response.next_seq.checked_sub(1).or(after_seq);
|
||||
}
|
||||
drop(session);
|
||||
Ok((output, exit_code, true))
|
||||
}
|
||||
|
||||
async fn assert_exec_process_streams_output(use_remote: bool) -> Result<()> {
|
||||
let context = create_process_context(use_remote).await?;
|
||||
let process_id = "proc-stream".to_string();
|
||||
let session = context
|
||||
.backend
|
||||
.start(ExecParams {
|
||||
process_id: process_id.clone(),
|
||||
argv: vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"sleep 0.05; printf 'session output\\n'".to_string(),
|
||||
],
|
||||
cwd: std::env::current_dir()?,
|
||||
env: Default::default(),
|
||||
tty: false,
|
||||
arg0: None,
|
||||
})
|
||||
.await?;
|
||||
assert_eq!(session.process.process_id().as_str(), process_id);
|
||||
|
||||
let StartedExecProcess { process } = session;
|
||||
let wake_rx = process.subscribe_wake();
|
||||
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
|
||||
assert_eq!(output, "session output\n");
|
||||
assert_eq!(exit_code, Some(0));
|
||||
assert!(closed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn assert_exec_process_write_then_read(use_remote: bool) -> Result<()> {
|
||||
let context = create_process_context(use_remote).await?;
|
||||
let process_id = "proc-stdin".to_string();
|
||||
let session = context
|
||||
.backend
|
||||
.start(ExecParams {
|
||||
process_id: process_id.clone(),
|
||||
argv: vec![
|
||||
"/usr/bin/python3".to_string(),
|
||||
"-c".to_string(),
|
||||
"import sys; line = sys.stdin.readline(); sys.stdout.write(f'from-stdin:{line}'); sys.stdout.flush()".to_string(),
|
||||
],
|
||||
cwd: std::env::current_dir()?,
|
||||
env: Default::default(),
|
||||
tty: true,
|
||||
arg0: None,
|
||||
})
|
||||
.await?;
|
||||
assert_eq!(session.process.process_id().as_str(), process_id);
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
session.process.write(b"hello\n".to_vec()).await?;
|
||||
let StartedExecProcess { process } = session;
|
||||
let wake_rx = process.subscribe_wake();
|
||||
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
|
||||
|
||||
assert!(
|
||||
output.contains("from-stdin:hello"),
|
||||
"unexpected output: {output:?}"
|
||||
);
|
||||
assert_eq!(exit_code, Some(0));
|
||||
assert!(closed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn assert_exec_process_preserves_queued_events_before_subscribe(
|
||||
use_remote: bool,
|
||||
) -> Result<()> {
|
||||
let context = create_process_context(use_remote).await?;
|
||||
let session = context
|
||||
.backend
|
||||
.start(ExecParams {
|
||||
process_id: "proc-queued".to_string(),
|
||||
argv: vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"printf 'queued output\\n'".to_string(),
|
||||
],
|
||||
cwd: std::env::current_dir()?,
|
||||
env: Default::default(),
|
||||
tty: false,
|
||||
arg0: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
|
||||
let StartedExecProcess { process } = session;
|
||||
let wake_rx = process.subscribe_wake();
|
||||
let (output, exit_code, closed) = collect_process_output_from_reads(process, wake_rx).await?;
|
||||
assert_eq!(output, "queued output\n");
|
||||
assert_eq!(exit_code, Some(0));
|
||||
assert!(closed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_exec_process_reports_transport_disconnect() -> Result<()> {
|
||||
let mut context = create_process_context(/*use_remote*/ true).await?;
|
||||
let session = context
|
||||
.backend
|
||||
.start(ExecParams {
|
||||
process_id: "proc-disconnect".to_string(),
|
||||
argv: vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"sleep 10".to_string(),
|
||||
],
|
||||
cwd: std::env::current_dir()?,
|
||||
env: Default::default(),
|
||||
tty: false,
|
||||
arg0: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let server = context
|
||||
.server
|
||||
.as_mut()
|
||||
.expect("remote context should include exec-server harness");
|
||||
server.shutdown().await?;
|
||||
|
||||
let mut wake_rx = session.process.subscribe_wake();
|
||||
let response = read_process_until_change(session.process, &mut wake_rx, None).await?;
|
||||
let message = response
|
||||
.failure
|
||||
.expect("disconnect should surface as a failure");
|
||||
assert!(
|
||||
message.starts_with("exec-server transport disconnected"),
|
||||
"unexpected failure message: {message}"
|
||||
);
|
||||
assert!(
|
||||
response.closed,
|
||||
"disconnect should close the process session"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -85,3 +256,24 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
|
||||
async fn exec_process_starts_and_exits(use_remote: bool) -> Result<()> {
|
||||
assert_exec_process_starts_and_exits(use_remote).await
|
||||
}
|
||||
|
||||
#[test_case(false ; "local")]
|
||||
#[test_case(true ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_process_streams_output(use_remote: bool) -> Result<()> {
|
||||
assert_exec_process_streams_output(use_remote).await
|
||||
}
|
||||
|
||||
#[test_case(false ; "local")]
|
||||
#[test_case(true ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_process_write_then_read(use_remote: bool) -> Result<()> {
|
||||
assert_exec_process_write_then_read(use_remote).await
|
||||
}
|
||||
|
||||
#[test_case(false ; "local")]
|
||||
#[test_case(true ; "remote")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_process_preserves_queued_events_before_subscribe(use_remote: bool) -> Result<()> {
|
||||
assert_exec_process_preserves_queued_events_before_subscribe(use_remote).await
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user