Add pushed exec process events

Add a retained event stream for exec processes so subscribers can consume output and lifecycle events either live or after the process has already closed.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-15 15:44:53 -07:00
parent 086ae0abe5
commit 53ea2a8dda
6 changed files with 368 additions and 10 deletions

View File

@@ -16,6 +16,9 @@ use crate::ProcessId;
use crate::client_api::ExecServerClientConnectOptions;
use crate::client_api::RemoteExecServerConnectArgs;
use crate::connection::JsonRpcConnection;
use crate::process::ExecProcessEvent;
use crate::process::ExecProcessEventLog;
use crate::process::ExecProcessEventReceiver;
use crate::protocol::EXEC_CLOSED_METHOD;
use crate::protocol::EXEC_EXITED_METHOD;
use crate::protocol::EXEC_METHOD;
@@ -53,6 +56,7 @@ use crate::protocol::INITIALIZE_METHOD;
use crate::protocol::INITIALIZED_METHOD;
use crate::protocol::InitializeParams;
use crate::protocol::InitializeResponse;
use crate::protocol::ProcessOutputChunk;
use crate::protocol::ReadParams;
use crate::protocol::ReadResponse;
use crate::protocol::TerminateParams;
@@ -65,6 +69,7 @@ use crate::rpc::RpcClientEvent;
const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
const INITIALIZE_TIMEOUT: Duration = Duration::from_secs(10);
const PROCESS_EVENT_CHANNEL_CAPACITY: usize = 256;
impl Default for ExecServerClientConnectOptions {
fn default() -> Self {
@@ -100,6 +105,7 @@ impl RemoteExecServerConnectArgs {
pub(crate) struct SessionState {
wake_tx: watch::Sender<u64>,
events: ExecProcessEventLog,
failure: Mutex<Option<String>>,
}
@@ -452,6 +458,7 @@ impl SessionState {
let (wake_tx, _wake_rx) = watch::channel(0);
Self {
wake_tx,
events: ExecProcessEventLog::new(PROCESS_EVENT_CHANNEL_CAPACITY),
failure: Mutex::new(None),
}
}
@@ -460,19 +467,31 @@ impl SessionState {
self.wake_tx.subscribe()
}
pub(crate) fn subscribe_events(&self) -> ExecProcessEventReceiver {
self.events.subscribe()
}
fn note_change(&self, seq: u64) {
let next = (*self.wake_tx.borrow()).max(seq);
let _ = self.wake_tx.send(next);
}
fn publish_event(&self, event: ExecProcessEvent) {
self.events.publish(event);
}
async fn set_failure(&self, message: String) {
let mut failure = self.failure.lock().await;
if failure.is_none() {
*failure = Some(message);
let should_publish = failure.is_none();
if should_publish {
*failure = Some(message.clone());
}
drop(failure);
let next = (*self.wake_tx.borrow()).saturating_add(1);
let _ = self.wake_tx.send(next);
if should_publish {
self.publish_event(ExecProcessEvent::Failed(message));
}
}
async fn failed_response(&self) -> Option<ReadResponse> {
@@ -505,6 +524,10 @@ impl Session {
self.state.subscribe()
}
pub(crate) fn subscribe_events(&self) -> ExecProcessEventReceiver {
self.state.subscribe_events()
}
pub(crate) async fn read(
&self,
after_seq: Option<u64>,
@@ -628,6 +651,11 @@ async fn handle_server_notification(
serde_json::from_value(notification.params.unwrap_or(Value::Null))?;
if let Some(session) = inner.get_session(&params.process_id) {
session.note_change(params.seq);
session.publish_event(ExecProcessEvent::Output(ProcessOutputChunk {
seq: params.seq,
stream: params.stream,
chunk: params.chunk,
}));
}
}
EXEC_EXITED_METHOD => {
@@ -635,6 +663,10 @@ async fn handle_server_notification(
serde_json::from_value(notification.params.unwrap_or(Value::Null))?;
if let Some(session) = inner.get_session(&params.process_id) {
session.note_change(params.seq);
session.publish_event(ExecProcessEvent::Exited {
seq: params.seq,
exit_code: params.exit_code,
});
}
}
EXEC_CLOSED_METHOD => {
@@ -645,6 +677,7 @@ async fn handle_server_notification(
let session = inner.remove_session(&params.process_id).await;
if let Some(session) = session {
session.note_change(params.seq);
session.publish_event(ExecProcessEvent::Closed { seq: params.seq });
}
}
other => {

View File

@@ -39,6 +39,8 @@ pub use local_file_system::LOCAL_FS;
pub use local_file_system::LocalFileSystem;
pub use process::ExecBackend;
pub use process::ExecProcess;
pub use process::ExecProcessEvent;
pub use process::ExecProcessEventReceiver;
pub use process::StartedExecProcess;
pub use process_id::ProcessId;
pub use protocol::ExecClosedNotification;
@@ -65,6 +67,7 @@ pub use protocol::FsWriteFileParams;
pub use protocol::FsWriteFileResponse;
pub use protocol::InitializeParams;
pub use protocol::InitializeResponse;
pub use protocol::ProcessOutputChunk;
pub use protocol::ReadParams;
pub use protocol::ReadResponse;
pub use protocol::TerminateParams;

View File

@@ -17,9 +17,12 @@ use tokio::sync::watch;
use crate::ExecBackend;
use crate::ExecProcess;
use crate::ExecProcessEvent;
use crate::ExecProcessEventReceiver;
use crate::ExecServerError;
use crate::ProcessId;
use crate::StartedExecProcess;
use crate::process::ExecProcessEventLog;
use crate::protocol::EXEC_CLOSED_METHOD;
use crate::protocol::ExecClosedNotification;
use crate::protocol::ExecEnvPolicy;
@@ -44,6 +47,7 @@ use crate::rpc::invalid_request;
const RETAINED_OUTPUT_BYTES_PER_PROCESS: usize = 1024 * 1024;
const NOTIFICATION_CHANNEL_CAPACITY: usize = 256;
const PROCESS_EVENT_CHANNEL_CAPACITY: usize = 256;
#[cfg(test)]
const EXITED_PROCESS_RETENTION: Duration = Duration::from_millis(25);
#[cfg(not(test))]
@@ -65,6 +69,7 @@ struct RunningProcess {
next_seq: u64,
exit_code: Option<i32>,
wake_tx: watch::Sender<u64>,
events: ExecProcessEventLog,
output_notify: Arc<Notify>,
open_streams: usize,
closed: bool,
@@ -89,6 +94,7 @@ struct LocalExecProcess {
process_id: ProcessId,
backend: LocalProcess,
wake_tx: watch::Sender<u64>,
events: ExecProcessEventLog,
}
impl Default for LocalProcess {
@@ -138,7 +144,7 @@ impl LocalProcess {
async fn start_process(
&self,
params: ExecParams,
) -> Result<(ExecResponse, watch::Sender<u64>), JSONRPCErrorError> {
) -> Result<(ExecResponse, watch::Sender<u64>, ExecProcessEventLog), JSONRPCErrorError> {
let process_id = params.process_id.clone();
let (program, args) = params
.argv
@@ -198,6 +204,7 @@ impl LocalProcess {
let output_notify = Arc::new(Notify::new());
let (wake_tx, _wake_rx) = watch::channel(0);
let events = ExecProcessEventLog::new(PROCESS_EVENT_CHANNEL_CAPACITY);
{
let mut process_map = self.inner.processes.lock().await;
process_map.insert(
@@ -211,6 +218,7 @@ impl LocalProcess {
next_seq: 1,
exit_code: None,
wake_tx: wake_tx.clone(),
events: events.clone(),
output_notify: Arc::clone(&output_notify),
open_streams: 2,
closed: false,
@@ -247,13 +255,13 @@ impl LocalProcess {
output_notify,
));
Ok((ExecResponse { process_id }, wake_tx))
Ok((ExecResponse { process_id }, wake_tx, events))
}
pub(crate) async fn exec(&self, params: ExecParams) -> Result<ExecResponse, JSONRPCErrorError> {
self.start_process(params)
.await
.map(|(response, _)| response)
.map(|(response, _, _)| response)
}
pub(crate) async fn exec_read(
@@ -424,7 +432,7 @@ fn shell_environment_policy(env_policy: &ExecEnvPolicy) -> ShellEnvironmentPolic
#[async_trait]
impl ExecBackend for LocalProcess {
async fn start(&self, params: ExecParams) -> Result<StartedExecProcess, ExecServerError> {
let (response, wake_tx) = self
let (response, wake_tx, events) = self
.start_process(params)
.await
.map_err(map_handler_error)?;
@@ -433,6 +441,7 @@ impl ExecBackend for LocalProcess {
process_id: response.process_id,
backend: self.clone(),
wake_tx,
events,
}),
})
}
@@ -448,6 +457,10 @@ impl ExecProcess for LocalExecProcess {
self.wake_tx.subscribe()
}
fn subscribe_events(&self) -> ExecProcessEventReceiver {
self.events.subscribe()
}
async fn read(
&self,
after_seq: Option<u64>,
@@ -548,11 +561,19 @@ async fn stream_output(
process.retained_bytes = process.retained_bytes.saturating_sub(evicted.chunk.len());
}
let _ = process.wake_tx.send(seq);
let output = ProcessOutputChunk {
seq,
stream,
chunk: chunk.into(),
};
process
.events
.publish(ExecProcessEvent::Output(output.clone()));
ExecOutputDeltaNotification {
process_id: process_id.clone(),
seq,
stream,
chunk: chunk.into(),
chunk: output.chunk,
}
};
output_notify.notify_waiters();
@@ -580,6 +601,9 @@ async fn watch_exit(
process.next_seq += 1;
process.exit_code = Some(exit_code);
let _ = process.wake_tx.send(seq);
process
.events
.publish(ExecProcessEvent::Exited { seq, exit_code });
Some(ExecExitedNotification {
process_id: process_id.clone(),
seq,
@@ -640,6 +664,7 @@ async fn maybe_emit_closed(process_id: ProcessId, inner: Arc<Inner>) {
let seq = process.next_seq;
process.next_seq += 1;
let _ = process.wake_tx.send(seq);
process.events.publish(ExecProcessEvent::Closed { seq });
Some(ExecClosedNotification {
process_id: process_id.clone(),
seq,

View File

@@ -1,11 +1,15 @@
use std::collections::VecDeque;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
use async_trait::async_trait;
use tokio::sync::broadcast;
use tokio::sync::watch;
use crate::ExecServerError;
use crate::ProcessId;
use crate::protocol::ExecParams;
use crate::protocol::ProcessOutputChunk;
use crate::protocol::ReadResponse;
use crate::protocol::WriteResponse;
@@ -13,12 +17,101 @@ pub struct StartedExecProcess {
pub process: Arc<dyn ExecProcess>,
}
/// Pushed process events for consumers that want to follow process output as it
/// arrives instead of polling retained output with [`ExecProcess::read`].
///
/// The stream is scoped to one [`ExecProcess`] handle. `Output` events carry
/// stdout, stderr, or pty bytes. `Exited` reports the process exit status, while
/// `Closed` means all output streams have ended and no more output events will
/// arrive. `Failed` is used when the process session cannot continue, for
/// example because the remote executor connection disconnected.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ExecProcessEvent {
Output(ProcessOutputChunk),
Exited { seq: u64, exit_code: i32 },
Closed { seq: u64 },
Failed(String),
}
#[derive(Clone)]
pub(crate) struct ExecProcessEventLog {
inner: Arc<ExecProcessEventLogInner>,
}
struct ExecProcessEventLogInner {
history: StdMutex<VecDeque<ExecProcessEvent>>,
live_tx: broadcast::Sender<ExecProcessEvent>,
capacity: usize,
}
impl ExecProcessEventLog {
pub(crate) fn new(capacity: usize) -> Self {
let (live_tx, _live_rx) = broadcast::channel(capacity);
Self {
inner: Arc::new(ExecProcessEventLogInner {
history: StdMutex::new(VecDeque::new()),
live_tx,
capacity,
}),
}
}
pub(crate) fn publish(&self, event: ExecProcessEvent) {
let mut history = self
.inner
.history
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
history.push_back(event.clone());
while history.len() > self.inner.capacity {
history.pop_front();
}
let _ = self.inner.live_tx.send(event);
}
pub(crate) fn subscribe(&self) -> ExecProcessEventReceiver {
let history = self
.inner
.history
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let live_rx = self.inner.live_tx.subscribe();
let replay = history.iter().cloned().collect();
ExecProcessEventReceiver { replay, live_rx }
}
}
pub struct ExecProcessEventReceiver {
replay: VecDeque<ExecProcessEvent>,
live_rx: broadcast::Receiver<ExecProcessEvent>,
}
impl ExecProcessEventReceiver {
pub async fn recv(&mut self) -> Result<ExecProcessEvent, broadcast::error::RecvError> {
if let Some(event) = self.replay.pop_front() {
return Ok(event);
}
self.live_rx.recv().await
}
}
/// Handle for an executor-managed process.
///
/// Implementations must support both retained-output reads and pushed events:
/// `read` is the request/response API for callers that want to page through
/// buffered output, while `subscribe_events` is the streaming API for callers
/// that want output and lifecycle changes delivered as they happen.
#[async_trait]
pub trait ExecProcess: Send + Sync {
fn process_id(&self) -> &ProcessId;
fn subscribe_wake(&self) -> watch::Receiver<u64>;
fn subscribe_events(&self) -> ExecProcessEventReceiver;
async fn read(
&self,
after_seq: Option<u64>,

View File

@@ -6,6 +6,7 @@ use tracing::trace;
use crate::ExecBackend;
use crate::ExecProcess;
use crate::ExecProcessEventReceiver;
use crate::ExecServerError;
use crate::StartedExecProcess;
use crate::client::ExecServerClient;
@@ -56,6 +57,10 @@ impl ExecProcess for RemoteExecProcess {
self.session.subscribe_wake()
}
fn subscribe_events(&self) -> ExecProcessEventReceiver {
self.session.subscribe_events()
}
async fn read(
&self,
after_seq: Option<u64>,

View File

@@ -7,8 +7,10 @@ use std::sync::Arc;
use anyhow::Result;
use codex_exec_server::Environment;
use codex_exec_server::ExecBackend;
use codex_exec_server::ExecOutputStream;
use codex_exec_server::ExecParams;
use codex_exec_server::ExecProcess;
use codex_exec_server::ExecProcessEvent;
use codex_exec_server::ProcessId;
use codex_exec_server::ReadResponse;
use codex_exec_server::StartedExecProcess;
@@ -27,6 +29,22 @@ struct ProcessContext {
server: Option<ExecServerHarness>,
}
#[derive(Debug, PartialEq, Eq)]
enum ProcessEventSnapshot {
Output {
seq: u64,
stream: ExecOutputStream,
text: String,
},
Exited {
seq: u64,
exit_code: i32,
},
Closed {
seq: u64,
},
}
async fn create_process_context(use_remote: bool) -> Result<ProcessContext> {
if use_remote {
let server = exec_server().await?;
@@ -117,6 +135,69 @@ async fn collect_process_output_from_reads(
Ok((output, exit_code, true))
}
async fn collect_process_output_from_events(
session: Arc<dyn ExecProcess>,
) -> Result<(String, String, Option<i32>, bool)> {
let mut events = session.subscribe_events();
let mut stdout = String::new();
let mut stderr = String::new();
let mut exit_code = None;
loop {
match timeout(Duration::from_secs(2), events.recv()).await?? {
ExecProcessEvent::Output(chunk) => match chunk.stream {
ExecOutputStream::Stdout | ExecOutputStream::Pty => {
stdout.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
}
ExecOutputStream::Stderr => {
stderr.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
}
},
ExecProcessEvent::Exited {
seq: _,
exit_code: code,
} => {
exit_code = Some(code);
}
ExecProcessEvent::Closed { seq: _ } => {
drop(session);
return Ok((stdout, stderr, exit_code, true));
}
ExecProcessEvent::Failed(message) => {
anyhow::bail!("process failed before closed state: {message}");
}
}
}
}
async fn collect_process_event_snapshots(
session: Arc<dyn ExecProcess>,
) -> Result<Vec<ProcessEventSnapshot>> {
let mut events = session.subscribe_events();
let mut snapshots = Vec::new();
loop {
let snapshot = match timeout(Duration::from_secs(2), events.recv()).await?? {
ExecProcessEvent::Output(chunk) => ProcessEventSnapshot::Output {
seq: chunk.seq,
stream: chunk.stream,
text: String::from_utf8_lossy(&chunk.chunk.into_inner()).into_owned(),
},
ExecProcessEvent::Exited { seq, exit_code } => {
ProcessEventSnapshot::Exited { seq, exit_code }
}
ExecProcessEvent::Closed { seq } => ProcessEventSnapshot::Closed { seq },
ExecProcessEvent::Failed(message) => {
anyhow::bail!("process failed before closed state: {message}");
}
};
let closed = matches!(snapshot, ProcessEventSnapshot::Closed { .. });
snapshots.push(snapshot);
if closed {
drop(session);
return Ok(snapshots);
}
}
}
async fn assert_exec_process_streams_output(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-stream".to_string();
@@ -148,6 +229,96 @@ async fn assert_exec_process_streams_output(use_remote: bool) -> Result<()> {
Ok(())
}
async fn assert_exec_process_pushes_events(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-events".to_string();
let session = context
.backend
.start(ExecParams {
process_id: process_id.clone().into(),
argv: vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'event output\\n'; sleep 0.1; printf 'event err\\n' >&2; sleep 0.1; exit 7".to_string(),
],
cwd: std::env::current_dir()?,
env_policy: /*env_policy*/ None,
env: Default::default(),
tty: false,
pipe_stdin: false,
arg0: None,
})
.await?;
assert_eq!(session.process.process_id().as_str(), process_id);
let StartedExecProcess { process } = session;
let actual = collect_process_event_snapshots(process).await?;
assert_eq!(
actual,
vec![
ProcessEventSnapshot::Output {
seq: 1,
stream: ExecOutputStream::Stdout,
text: "event output\n".to_string(),
},
ProcessEventSnapshot::Output {
seq: 2,
stream: ExecOutputStream::Stderr,
text: "event err\n".to_string(),
},
ProcessEventSnapshot::Exited {
seq: 3,
exit_code: 7,
},
ProcessEventSnapshot::Closed { seq: 4 },
]
);
Ok(())
}
async fn assert_exec_process_replays_events_after_close(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-events-late".to_string();
let session = context
.backend
.start(ExecParams {
process_id: process_id.clone().into(),
argv: vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'late one\\n'; printf 'late two\\n'".to_string(),
],
cwd: std::env::current_dir()?,
env_policy: /*env_policy*/ None,
env: Default::default(),
tty: false,
pipe_stdin: false,
arg0: None,
})
.await?;
assert_eq!(session.process.process_id().as_str(), process_id);
let StartedExecProcess { process } = session;
let wake_rx = process.subscribe_wake();
let read_result = collect_process_output_from_reads(Arc::clone(&process), wake_rx).await?;
assert_eq!(
read_result,
("late one\nlate two\n".to_string(), Some(0), true)
);
let event_result = collect_process_output_from_events(process).await?;
assert_eq!(
event_result,
(
"late one\nlate two\n".to_string(),
String::new(),
Some(0),
true
)
);
Ok(())
}
async fn assert_exec_process_write_then_read(use_remote: bool) -> Result<()> {
let context = create_process_context(use_remote).await?;
let process_id = "proc-stdin".to_string();
@@ -311,15 +482,25 @@ async fn remote_exec_process_reports_transport_disconnect() -> Result<()> {
})
.await?;
let process = Arc::clone(&session.process);
let mut events = process.subscribe_events();
let server = context
.server
.as_mut()
.expect("remote context should include exec-server harness");
server.shutdown().await?;
let mut wake_rx = session.process.subscribe_wake();
let response =
read_process_until_change(session.process, &mut wake_rx, /*after_seq*/ None).await?;
let event = timeout(Duration::from_secs(2), events.recv()).await??;
let ExecProcessEvent::Failed(event_message) = event else {
anyhow::bail!("expected process failure event, got {event:?}");
};
assert!(
event_message.starts_with("exec-server transport disconnected"),
"unexpected failure event: {event_message}"
);
let mut wake_rx = process.subscribe_wake();
let response = read_process_until_change(process, &mut wake_rx, /*after_seq*/ None).await?;
let message = response
.failure
.expect("disconnect should surface as a failure");
@@ -353,6 +534,24 @@ async fn exec_process_streams_output(use_remote: bool) -> Result<()> {
assert_exec_process_streams_output(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
// Serialize tests that launch a real exec-server process through the full CLI.
#[serial_test::serial(remote_exec_server)]
async fn exec_process_pushes_events(use_remote: bool) -> Result<()> {
assert_exec_process_pushes_events(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
// Serialize tests that launch a real exec-server process through the full CLI.
#[serial_test::serial(remote_exec_server)]
async fn exec_process_replays_events_after_close(use_remote: bool) -> Result<()> {
assert_exec_process_replays_events_after_close(use_remote).await
}
#[test_case(false ; "local")]
#[test_case(true ; "remote")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]