feat: move exec-server ownership (#16344)

This introduces session-scoped ownership for exec-server so ws
disconnects no longer immediately kill running remote exec processes,
and it prepares the protocol for reconnect-based resume.
- add session_id / resume_session_id to the exec-server initialize
handshake
  - move process ownership under a shared session registry
- detach sessions on websocket disconnect and expire them after a TTL
instead of killing processes immediately (we will resume based on this)
- allow a new connection to resume an existing session and take over
notifications/ownership
- I use UUID to make them not predictable as we don't have auth for now
- make detached-session expiry authoritative at resume time so teardown
wins at the TTL boundary
- reject long-poll process/read calls that get resumed out from under an
older attachment

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
jif-oai
2026-04-10 14:11:47 +01:00
committed by GitHub
parent 7bbe3b6011
commit 085ffb4456
21 changed files with 1203 additions and 172 deletions

View File

@@ -9,6 +9,7 @@ use codex_exec_server::InitializeParams;
use codex_exec_server::InitializeResponse;
use common::exec_server::exec_server;
use pretty_assertions::assert_eq;
use uuid::Uuid;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_server_reports_malformed_websocket_json_and_keeps_running() -> anyhow::Result<()> {
@@ -36,6 +37,7 @@ async fn exec_server_reports_malformed_websocket_json_and_keeps_running() -> any
"initialize",
serde_json::to_value(InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
})?,
)
.await?;
@@ -53,7 +55,7 @@ async fn exec_server_reports_malformed_websocket_json_and_keeps_running() -> any
};
assert_eq!(id, initialize_id);
let initialize_response: InitializeResponse = serde_json::from_value(result)?;
assert_eq!(initialize_response, InitializeResponse {});
Uuid::parse_str(&initialize_response.session_id)?;
server.shutdown().await?;
Ok(())