Attach WebRTC realtime starts to sideband websocket (#17057)

Summary:
- parse the realtime call Location header and join that call over the
direct realtime WebSocket
- keep WebRTC starts alive on the existing realtime conversation path

Validation:
- just fmt
- git diff --check
- cargo check -p codex-api
- cargo check -p codex-core --tests
- local cargo tests not run; relying on PR CI
This commit is contained in:
Ahmed Ibrahim
2026-04-08 15:25:42 -07:00
committed by GitHub
parent 19bd018300
commit 794a0240f9
7 changed files with 534 additions and 89 deletions

View File

@@ -19,8 +19,10 @@ use codex_protocol::protocol::RealtimeEvent;
use codex_protocol::protocol::SessionSource;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::WebSocketConnectionConfig;
use core_test_support::responses::start_mock_server;
use core_test_support::responses::start_websocket_server;
use core_test_support::responses::start_websocket_server_with_headers;
use core_test_support::skip_if_no_network;
use core_test_support::streaming_sse::StreamingSseChunk;
use core_test_support::streaming_sse::start_streaming_sse_server;
@@ -337,19 +339,37 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let capture = RealtimeCallRequestCapture::new();
Mock::given(method("POST"))
.and(path_regex(".*/realtime/calls$"))
.and(capture.clone())
.respond_with(ResponseTemplate::new(200).set_body_string("v=answer\r\n"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("Location", "/v1/realtime/calls/calls/rtc_core_test")
.set_body_string("v=answer\r\n"),
)
.mount(&server)
.await;
let realtime_server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
requests: vec![vec![json!({
"type": "session.updated",
"session": { "id": "sess_webrtc", "instructions": "backend prompt" }
})]],
response_headers: Vec::new(),
accept_delay: None,
close_after_requests: false,
}])
.await;
let mut builder = test_codex().with_config(|config| {
let realtime_ws_base_url = realtime_server.uri().to_string();
let mut builder = test_codex().with_config(move |config| {
config.experimental_realtime_ws_backend_prompt = Some("backend prompt".to_string());
config.experimental_realtime_ws_model = Some("realtime-test-model".to_string());
config.experimental_realtime_ws_startup_context = Some("startup context".to_string());
config.experimental_realtime_ws_base_url = Some(realtime_ws_base_url);
});
let test = builder.build(&server).await?;
@@ -363,6 +383,8 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
}))
.await?;
// Phase 1: the client gets the SDP answer that configures its peer connection, and then the
// normal realtime event stream from the joined sideband WebSocket.
let created = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationSdp(created) => Some(Ok(created.clone())),
EventMsg::Error(err) => Some(Err(err.clone())),
@@ -371,13 +393,18 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
.await
.unwrap_or_else(|err: ErrorEvent| panic!("conversation call create failed: {err:?}"));
assert_eq!(created.sdp, "v=answer\r\n");
let closed = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()),
let session_updated = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
payload: RealtimeEvent::SessionUpdated { session_id, .. },
}) => Some(session_id.clone()),
_ => None,
})
.await;
assert_eq!(closed.reason.as_deref(), Some("transport_closed"));
assert_eq!(session_updated, "sess_webrtc");
// Phase 2: call creation posts the offer and generated session together, so the media leg can
// begin inference before the sideband WebSocket is ready.
let request = capture.single_request();
assert_eq!(request.url.path(), "/v1/realtime/calls");
assert_eq!(request.url.query(), None);
@@ -415,6 +442,42 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
)
);
// Phase 3: the server joins that same call over the direct sideband WebSocket, sends the
// ordinary session.update, and keeps the conversation alive until the client closes it.
let session_update = realtime_server
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 0)
.await;
assert_eq!(
session_update.body_json()["type"].as_str(),
Some("session.update")
);
assert!(
websocket_request_instructions(&session_update)
.context("session.update should include instructions")?
.contains("startup context")
);
let handshake = realtime_server.single_handshake();
assert_eq!(
handshake.uri(),
"/v1/realtime?intent=quicksilver&call_id=rtc_core_test"
);
assert_eq!(
handshake.header("authorization").as_deref(),
Some("Bearer dummy")
);
test.codex.submit(Op::RealtimeConversationClose).await?;
let closed = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()),
_ => None,
})
.await;
assert!(matches!(
closed.reason.as_deref(),
Some("requested" | "transport_closed")
));
realtime_server.shutdown().await;
Ok(())
}