mirror of
https://github.com/openai/codex.git
synced 2026-05-04 03:16:31 +00:00
feat: move exec-server ownership (#16344)
This introduces session-scoped ownership for exec-server so ws disconnects no longer immediately kill running remote exec processes, and it prepares the protocol for reconnect-based resume. - add session_id / resume_session_id to the exec-server initialize handshake - move process ownership under a shared session registry - detach sessions on websocket disconnect and expire them after a TTL instead of killing processes immediately (we will resume based on this) - allow a new connection to resume an existing session and take over notifications/ownership - I use UUID to make them not predictable as we don't have auth for now - make detached-session expiry authoritative at resume time so teardown wins at the TTL boundary - reject long-poll process/read calls that get resumed out from under an older attachment --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -4,43 +4,81 @@ use std::time::Duration;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::ExecServerHandler;
|
||||
use crate::ProcessId;
|
||||
use crate::protocol::ExecParams;
|
||||
use crate::protocol::InitializeResponse;
|
||||
use crate::protocol::InitializeParams;
|
||||
use crate::protocol::ReadParams;
|
||||
use crate::protocol::ReadResponse;
|
||||
use crate::protocol::TerminateParams;
|
||||
use crate::protocol::TerminateResponse;
|
||||
use crate::rpc::RpcNotificationSender;
|
||||
use crate::server::session_registry::SessionRegistry;
|
||||
|
||||
fn exec_params(process_id: &str) -> ExecParams {
|
||||
let mut env = HashMap::new();
|
||||
if let Some(path) = std::env::var_os("PATH") {
|
||||
env.insert("PATH".to_string(), path.to_string_lossy().into_owned());
|
||||
}
|
||||
exec_params_with_argv(process_id, sleep_argv())
|
||||
}
|
||||
|
||||
fn exec_params_with_argv(process_id: &str, argv: Vec<String>) -> ExecParams {
|
||||
ExecParams {
|
||||
process_id: ProcessId::from(process_id),
|
||||
argv: vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 0.1".to_string(),
|
||||
],
|
||||
argv,
|
||||
cwd: std::env::current_dir().expect("cwd"),
|
||||
env,
|
||||
env: inherited_path_env(),
|
||||
tty: false,
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn inherited_path_env() -> HashMap<String, String> {
|
||||
let mut env = HashMap::new();
|
||||
if let Some(path) = std::env::var_os("PATH") {
|
||||
env.insert("PATH".to_string(), path.to_string_lossy().into_owned());
|
||||
}
|
||||
env
|
||||
}
|
||||
|
||||
fn sleep_argv() -> Vec<String> {
|
||||
shell_argv("sleep 0.1", "ping -n 2 127.0.0.1 >NUL")
|
||||
}
|
||||
|
||||
fn shell_argv(unix_script: &str, windows_script: &str) -> Vec<String> {
|
||||
if cfg!(windows) {
|
||||
vec![
|
||||
windows_command_processor(),
|
||||
"/C".to_string(),
|
||||
windows_script.to_string(),
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
unix_script.to_string(),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
fn windows_command_processor() -> String {
|
||||
std::env::var("COMSPEC").unwrap_or_else(|_| "cmd.exe".to_string())
|
||||
}
|
||||
|
||||
async fn initialized_handler() -> Arc<ExecServerHandler> {
|
||||
let (outgoing_tx, _outgoing_rx) = mpsc::channel(16);
|
||||
let handler = Arc::new(ExecServerHandler::new(RpcNotificationSender::new(
|
||||
outgoing_tx,
|
||||
)));
|
||||
assert_eq!(
|
||||
handler.initialize().expect("initialize"),
|
||||
InitializeResponse {}
|
||||
);
|
||||
let registry = SessionRegistry::new();
|
||||
let handler = Arc::new(ExecServerHandler::new(
|
||||
registry,
|
||||
RpcNotificationSender::new(outgoing_tx),
|
||||
));
|
||||
let initialize_response = handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: None,
|
||||
})
|
||||
.await
|
||||
.expect("initialize");
|
||||
Uuid::parse_str(&initialize_response.session_id).expect("session id should be a UUID");
|
||||
handler.initialized().expect("initialized");
|
||||
handler
|
||||
}
|
||||
@@ -101,3 +139,197 @@ async fn terminate_reports_false_after_process_exit() {
|
||||
|
||||
handler.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn long_poll_read_fails_after_session_resume() {
|
||||
let (first_tx, _first_rx) = mpsc::channel(16);
|
||||
let registry = SessionRegistry::new();
|
||||
let first_handler = Arc::new(ExecServerHandler::new(
|
||||
Arc::clone(®istry),
|
||||
RpcNotificationSender::new(first_tx),
|
||||
));
|
||||
let initialize_response = first_handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: None,
|
||||
})
|
||||
.await
|
||||
.expect("initialize");
|
||||
first_handler.initialized().expect("initialized");
|
||||
|
||||
first_handler
|
||||
.exec(exec_params_with_argv(
|
||||
"proc-long-poll",
|
||||
shell_argv(
|
||||
"sleep 0.1; printf resumed",
|
||||
"ping -n 2 127.0.0.1 >NUL && echo resumed",
|
||||
),
|
||||
))
|
||||
.await
|
||||
.expect("start process");
|
||||
|
||||
let first_read_handler = Arc::clone(&first_handler);
|
||||
let read_task = tokio::spawn(async move {
|
||||
first_read_handler
|
||||
.exec_read(ReadParams {
|
||||
process_id: ProcessId::from("proc-long-poll"),
|
||||
after_seq: None,
|
||||
max_bytes: None,
|
||||
wait_ms: Some(500),
|
||||
})
|
||||
.await
|
||||
});
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
first_handler.shutdown().await;
|
||||
|
||||
let (second_tx, _second_rx) = mpsc::channel(16);
|
||||
let second_handler = Arc::new(ExecServerHandler::new(
|
||||
registry,
|
||||
RpcNotificationSender::new(second_tx),
|
||||
));
|
||||
second_handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: Some(initialize_response.session_id),
|
||||
})
|
||||
.await
|
||||
.expect("initialize second connection");
|
||||
second_handler
|
||||
.initialized()
|
||||
.expect("initialized second connection");
|
||||
|
||||
let err = read_task
|
||||
.await
|
||||
.expect("read task should join")
|
||||
.expect_err("evicted long-poll read should fail");
|
||||
assert_eq!(err.code, -32600);
|
||||
assert_eq!(
|
||||
err.message,
|
||||
"session has been resumed by another connection"
|
||||
);
|
||||
|
||||
second_handler.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn active_session_resume_is_rejected() {
|
||||
let (first_tx, _first_rx) = mpsc::channel(16);
|
||||
let registry = SessionRegistry::new();
|
||||
let first_handler = Arc::new(ExecServerHandler::new(
|
||||
Arc::clone(®istry),
|
||||
RpcNotificationSender::new(first_tx),
|
||||
));
|
||||
let initialize_response = first_handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: None,
|
||||
})
|
||||
.await
|
||||
.expect("initialize");
|
||||
|
||||
let (second_tx, _second_rx) = mpsc::channel(16);
|
||||
let second_handler = Arc::new(ExecServerHandler::new(
|
||||
registry,
|
||||
RpcNotificationSender::new(second_tx),
|
||||
));
|
||||
let err = second_handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: Some(initialize_response.session_id.clone()),
|
||||
})
|
||||
.await
|
||||
.expect_err("active session resume should fail");
|
||||
|
||||
assert_eq!(err.code, -32600);
|
||||
assert_eq!(
|
||||
err.message,
|
||||
format!(
|
||||
"session {} is already attached to another connection",
|
||||
initialize_response.session_id
|
||||
)
|
||||
);
|
||||
|
||||
first_handler.shutdown().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn output_and_exit_are_retained_after_notification_receiver_closes() {
|
||||
let (outgoing_tx, outgoing_rx) = mpsc::channel(16);
|
||||
let handler = Arc::new(ExecServerHandler::new(
|
||||
SessionRegistry::new(),
|
||||
RpcNotificationSender::new(outgoing_tx),
|
||||
));
|
||||
handler
|
||||
.initialize(InitializeParams {
|
||||
client_name: "exec-server-test".to_string(),
|
||||
resume_session_id: None,
|
||||
})
|
||||
.await
|
||||
.expect("initialize");
|
||||
handler.initialized().expect("initialized");
|
||||
|
||||
let process_id = ProcessId::from("proc-notification-fail");
|
||||
handler
|
||||
.exec(exec_params_with_argv(
|
||||
process_id.as_str(),
|
||||
shell_argv(
|
||||
"sleep 0.05; printf 'first\\n'; sleep 0.05; printf 'second\\n'",
|
||||
"echo first && ping -n 2 127.0.0.1 >NUL && echo second",
|
||||
),
|
||||
))
|
||||
.await
|
||||
.expect("start process");
|
||||
|
||||
drop(outgoing_rx);
|
||||
|
||||
let (output, exit_code) = read_process_until_closed(&handler, process_id.clone()).await;
|
||||
assert_eq!(output.replace("\r\n", "\n"), "first\nsecond\n");
|
||||
assert_eq!(exit_code, Some(0));
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
handler
|
||||
.exec(exec_params(process_id.as_str()))
|
||||
.await
|
||||
.expect("process id should be reusable after exit retention");
|
||||
|
||||
handler.shutdown().await;
|
||||
}
|
||||
|
||||
async fn read_process_until_closed(
|
||||
handler: &ExecServerHandler,
|
||||
process_id: ProcessId,
|
||||
) -> (String, Option<i32>) {
|
||||
let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
|
||||
let mut output = String::new();
|
||||
let mut exit_code = None;
|
||||
let mut after_seq = None;
|
||||
|
||||
loop {
|
||||
let response: ReadResponse = handler
|
||||
.exec_read(ReadParams {
|
||||
process_id: process_id.clone(),
|
||||
after_seq,
|
||||
max_bytes: None,
|
||||
wait_ms: Some(500),
|
||||
})
|
||||
.await
|
||||
.expect("read process");
|
||||
|
||||
for chunk in response.chunks {
|
||||
output.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
|
||||
after_seq = Some(chunk.seq);
|
||||
}
|
||||
if response.exited {
|
||||
exit_code = response.exit_code;
|
||||
}
|
||||
if response.closed {
|
||||
return (output, exit_code);
|
||||
}
|
||||
after_seq = response.next_seq.checked_sub(1).or(after_seq);
|
||||
assert!(
|
||||
tokio::time::Instant::now() < deadline,
|
||||
"process should close within 2s"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user