feat: move exec-server ownership (#16344)

This introduces session-scoped ownership for exec-server so ws
disconnects no longer immediately kill running remote exec processes,
and it prepares the protocol for reconnect-based resume.
- add session_id / resume_session_id to the exec-server initialize
handshake
  - move process ownership under a shared session registry
- detach sessions on websocket disconnect and expire them after a TTL
instead of killing processes immediately (we will resume based on this)
- allow a new connection to resume an existing session and take over
notifications/ownership
- I use UUID to make them not predictable as we don't have auth for now
- make detached-session expiry authoritative at resume time so teardown
wins at the TTL boundary
- reject long-poll process/read calls that get resumed out from under an
older attachment

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
jif-oai
2026-04-10 14:11:47 +01:00
committed by GitHub
parent 7bbe3b6011
commit 085ffb4456
21 changed files with 1203 additions and 172 deletions

View File

@@ -4,43 +4,81 @@ use std::time::Duration;
use pretty_assertions::assert_eq;
use tokio::sync::mpsc;
use uuid::Uuid;
use super::ExecServerHandler;
use crate::ProcessId;
use crate::protocol::ExecParams;
use crate::protocol::InitializeResponse;
use crate::protocol::InitializeParams;
use crate::protocol::ReadParams;
use crate::protocol::ReadResponse;
use crate::protocol::TerminateParams;
use crate::protocol::TerminateResponse;
use crate::rpc::RpcNotificationSender;
use crate::server::session_registry::SessionRegistry;
fn exec_params(process_id: &str) -> ExecParams {
let mut env = HashMap::new();
if let Some(path) = std::env::var_os("PATH") {
env.insert("PATH".to_string(), path.to_string_lossy().into_owned());
}
exec_params_with_argv(process_id, sleep_argv())
}
fn exec_params_with_argv(process_id: &str, argv: Vec<String>) -> ExecParams {
ExecParams {
process_id: ProcessId::from(process_id),
argv: vec![
"bash".to_string(),
"-lc".to_string(),
"sleep 0.1".to_string(),
],
argv,
cwd: std::env::current_dir().expect("cwd"),
env,
env: inherited_path_env(),
tty: false,
arg0: None,
}
}
fn inherited_path_env() -> HashMap<String, String> {
let mut env = HashMap::new();
if let Some(path) = std::env::var_os("PATH") {
env.insert("PATH".to_string(), path.to_string_lossy().into_owned());
}
env
}
fn sleep_argv() -> Vec<String> {
shell_argv("sleep 0.1", "ping -n 2 127.0.0.1 >NUL")
}
fn shell_argv(unix_script: &str, windows_script: &str) -> Vec<String> {
if cfg!(windows) {
vec![
windows_command_processor(),
"/C".to_string(),
windows_script.to_string(),
]
} else {
vec![
"/bin/sh".to_string(),
"-c".to_string(),
unix_script.to_string(),
]
}
}
fn windows_command_processor() -> String {
std::env::var("COMSPEC").unwrap_or_else(|_| "cmd.exe".to_string())
}
async fn initialized_handler() -> Arc<ExecServerHandler> {
let (outgoing_tx, _outgoing_rx) = mpsc::channel(16);
let handler = Arc::new(ExecServerHandler::new(RpcNotificationSender::new(
outgoing_tx,
)));
assert_eq!(
handler.initialize().expect("initialize"),
InitializeResponse {}
);
let registry = SessionRegistry::new();
let handler = Arc::new(ExecServerHandler::new(
registry,
RpcNotificationSender::new(outgoing_tx),
));
let initialize_response = handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
})
.await
.expect("initialize");
Uuid::parse_str(&initialize_response.session_id).expect("session id should be a UUID");
handler.initialized().expect("initialized");
handler
}
@@ -101,3 +139,197 @@ async fn terminate_reports_false_after_process_exit() {
handler.shutdown().await;
}
#[tokio::test]
async fn long_poll_read_fails_after_session_resume() {
let (first_tx, _first_rx) = mpsc::channel(16);
let registry = SessionRegistry::new();
let first_handler = Arc::new(ExecServerHandler::new(
Arc::clone(&registry),
RpcNotificationSender::new(first_tx),
));
let initialize_response = first_handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
})
.await
.expect("initialize");
first_handler.initialized().expect("initialized");
first_handler
.exec(exec_params_with_argv(
"proc-long-poll",
shell_argv(
"sleep 0.1; printf resumed",
"ping -n 2 127.0.0.1 >NUL && echo resumed",
),
))
.await
.expect("start process");
let first_read_handler = Arc::clone(&first_handler);
let read_task = tokio::spawn(async move {
first_read_handler
.exec_read(ReadParams {
process_id: ProcessId::from("proc-long-poll"),
after_seq: None,
max_bytes: None,
wait_ms: Some(500),
})
.await
});
tokio::time::sleep(Duration::from_millis(50)).await;
first_handler.shutdown().await;
let (second_tx, _second_rx) = mpsc::channel(16);
let second_handler = Arc::new(ExecServerHandler::new(
registry,
RpcNotificationSender::new(second_tx),
));
second_handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: Some(initialize_response.session_id),
})
.await
.expect("initialize second connection");
second_handler
.initialized()
.expect("initialized second connection");
let err = read_task
.await
.expect("read task should join")
.expect_err("evicted long-poll read should fail");
assert_eq!(err.code, -32600);
assert_eq!(
err.message,
"session has been resumed by another connection"
);
second_handler.shutdown().await;
}
#[tokio::test]
async fn active_session_resume_is_rejected() {
let (first_tx, _first_rx) = mpsc::channel(16);
let registry = SessionRegistry::new();
let first_handler = Arc::new(ExecServerHandler::new(
Arc::clone(&registry),
RpcNotificationSender::new(first_tx),
));
let initialize_response = first_handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
})
.await
.expect("initialize");
let (second_tx, _second_rx) = mpsc::channel(16);
let second_handler = Arc::new(ExecServerHandler::new(
registry,
RpcNotificationSender::new(second_tx),
));
let err = second_handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: Some(initialize_response.session_id.clone()),
})
.await
.expect_err("active session resume should fail");
assert_eq!(err.code, -32600);
assert_eq!(
err.message,
format!(
"session {} is already attached to another connection",
initialize_response.session_id
)
);
first_handler.shutdown().await;
}
#[tokio::test]
async fn output_and_exit_are_retained_after_notification_receiver_closes() {
let (outgoing_tx, outgoing_rx) = mpsc::channel(16);
let handler = Arc::new(ExecServerHandler::new(
SessionRegistry::new(),
RpcNotificationSender::new(outgoing_tx),
));
handler
.initialize(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
})
.await
.expect("initialize");
handler.initialized().expect("initialized");
let process_id = ProcessId::from("proc-notification-fail");
handler
.exec(exec_params_with_argv(
process_id.as_str(),
shell_argv(
"sleep 0.05; printf 'first\\n'; sleep 0.05; printf 'second\\n'",
"echo first && ping -n 2 127.0.0.1 >NUL && echo second",
),
))
.await
.expect("start process");
drop(outgoing_rx);
let (output, exit_code) = read_process_until_closed(&handler, process_id.clone()).await;
assert_eq!(output.replace("\r\n", "\n"), "first\nsecond\n");
assert_eq!(exit_code, Some(0));
tokio::time::sleep(Duration::from_millis(100)).await;
handler
.exec(exec_params(process_id.as_str()))
.await
.expect("process id should be reusable after exit retention");
handler.shutdown().await;
}
async fn read_process_until_closed(
handler: &ExecServerHandler,
process_id: ProcessId,
) -> (String, Option<i32>) {
let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
let mut output = String::new();
let mut exit_code = None;
let mut after_seq = None;
loop {
let response: ReadResponse = handler
.exec_read(ReadParams {
process_id: process_id.clone(),
after_seq,
max_bytes: None,
wait_ms: Some(500),
})
.await
.expect("read process");
for chunk in response.chunks {
output.push_str(&String::from_utf8_lossy(&chunk.chunk.into_inner()));
after_seq = Some(chunk.seq);
}
if response.exited {
exit_code = response.exit_code;
}
if response.closed {
return (output, exit_code);
}
after_seq = response.next_seq.checked_sub(1).or(after_seq);
assert!(
tokio::time::Instant::now() < deadline,
"process should close within 2s"
);
}
}