Add WebRTC transport to realtime start (#16960)

Adds WebRTC startup to the experimental app-server
`thread/realtime/start` method with an optional transport enum. The
websocket path remains the default; WebRTC offers create the realtime
session through the shared start flow and emit the answer SDP via
`thread/realtime/sdp`.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-07 15:43:38 -07:00
committed by GitHub
parent 6c36e7d688
commit fb3dcfde1d
42 changed files with 1574 additions and 85 deletions

View File

@@ -72,6 +72,7 @@ mod tests {
use crate::provider::RetryConfig;
use async_trait::async_trait;
use codex_client::Request;
use codex_client::RequestBody;
use codex_client::Response;
use codex_client::StreamResponse;
use codex_client::TransportError;
@@ -213,7 +214,11 @@ mod tests {
request.url,
"https://example.com/api/codex/memories/trace_summarize"
);
let body = request.body.expect("request body should be present");
let body = request
.body
.as_ref()
.and_then(RequestBody::json)
.expect("request body should be JSON");
assert_eq!(body["model"], "gpt-test");
assert_eq!(body["traces"][0]["id"], "trace-1");
assert_eq!(

View File

@@ -1,6 +1,7 @@
pub(crate) mod compact;
pub(crate) mod memories;
pub(crate) mod models;
pub(crate) mod realtime_call;
pub(crate) mod realtime_websocket;
pub(crate) mod responses;
pub(crate) mod responses_websocket;
@@ -9,6 +10,8 @@ mod session;
pub use compact::CompactClient;
pub use memories::MemoriesClient;
pub use models::ModelsClient;
pub use realtime_call::RealtimeCallClient;
pub use realtime_call::RealtimeCallResponse;
pub use realtime_websocket::RealtimeEventParser;
pub use realtime_websocket::RealtimeSessionConfig;
pub use realtime_websocket::RealtimeSessionMode;
@@ -16,6 +19,7 @@ pub use realtime_websocket::RealtimeWebsocketClient;
pub use realtime_websocket::RealtimeWebsocketConnection;
pub use realtime_websocket::RealtimeWebsocketEvents;
pub use realtime_websocket::RealtimeWebsocketWriter;
pub use realtime_websocket::session_update_session_json;
pub use responses::ResponsesClient;
pub use responses::ResponsesOptions;
pub use responses_websocket::ResponsesWebsocketClient;

View File

@@ -0,0 +1,415 @@
use crate::auth::AuthProvider;
use crate::endpoint::realtime_websocket::RealtimeSessionConfig;
use crate::endpoint::realtime_websocket::session_update_session_json;
use crate::endpoint::session::EndpointSession;
use crate::error::ApiError;
use crate::provider::Provider;
use bytes::Bytes;
use codex_client::HttpTransport;
use codex_client::RequestBody;
use codex_client::RequestTelemetry;
use http::HeaderMap;
use http::HeaderValue;
use http::Method;
use http::header::CONTENT_TYPE;
use serde::Serialize;
use serde_json::Value;
use serde_json::to_string;
use serde_json::to_value;
use std::sync::Arc;
use tracing::instrument;
const MULTIPART_BOUNDARY: &str = "codex-realtime-call-boundary";
const MULTIPART_CONTENT_TYPE: &str = "multipart/form-data; boundary=codex-realtime-call-boundary";
pub struct RealtimeCallClient<T: HttpTransport, A: AuthProvider> {
session: EndpointSession<T, A>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RealtimeCallResponse {
pub sdp: String,
}
#[derive(Serialize)]
struct BackendRealtimeCallRequest<'a> {
sdp: &'a str,
session: &'a Value,
}
impl<T: HttpTransport, A: AuthProvider> RealtimeCallClient<T, A> {
pub fn new(transport: T, provider: Provider, auth: A) -> Self {
Self {
session: EndpointSession::new(transport, provider, auth),
}
}
pub fn with_telemetry(self, request: Option<Arc<dyn RequestTelemetry>>) -> Self {
Self {
session: self.session.with_request_telemetry(request),
}
}
fn path() -> &'static str {
"realtime/calls"
}
fn uses_backend_request_shape(&self) -> bool {
self.session.provider().base_url.contains("/backend-api")
}
#[instrument(
name = "realtime_call.create",
level = "info",
skip_all,
fields(
http.method = "POST",
api.path = "realtime/calls"
)
)]
pub async fn create(&self, sdp: String) -> Result<RealtimeCallResponse, ApiError> {
self.create_with_headers(sdp, HeaderMap::new()).await
}
pub async fn create_with_session(
&self,
sdp: String,
session_config: RealtimeSessionConfig,
) -> Result<RealtimeCallResponse, ApiError> {
self.create_with_session_and_headers(sdp, session_config, HeaderMap::new())
.await
}
pub async fn create_with_headers(
&self,
sdp: String,
extra_headers: HeaderMap,
) -> Result<RealtimeCallResponse, ApiError> {
let resp = self
.session
.execute_with(
Method::POST,
Self::path(),
extra_headers,
/*body*/ None,
|req| {
req.headers
.insert(CONTENT_TYPE, HeaderValue::from_static("application/sdp"));
req.body = Some(RequestBody::Raw(Bytes::from(sdp.clone())));
},
)
.await?;
let sdp = decode_sdp_response(resp.body.as_ref())?;
Ok(RealtimeCallResponse { sdp })
}
pub async fn create_with_session_and_headers(
&self,
sdp: String,
session_config: RealtimeSessionConfig,
extra_headers: HeaderMap,
) -> Result<RealtimeCallResponse, ApiError> {
let mut session = realtime_session_json(session_config)?;
if let Some(session) = session.as_object_mut() {
session.remove("id");
}
// TODO(aibrahim): Align the SIWC route with the API multipart shape and remove this branch.
if self.uses_backend_request_shape() {
let body = to_value(BackendRealtimeCallRequest {
sdp: &sdp,
session: &session,
})
.map_err(|err| ApiError::Stream(format!("failed to encode realtime call: {err}")))?;
let resp = self
.session
.execute(Method::POST, Self::path(), extra_headers, Some(body))
.await?;
let sdp = decode_sdp_response(resp.body.as_ref())?;
return Ok(RealtimeCallResponse { sdp });
}
let session = to_string(&session).map_err(|err| ApiError::InvalidRequest {
message: err.to_string(),
})?;
let mut body = Vec::new();
body.extend_from_slice(format!("--{MULTIPART_BOUNDARY}\r\n").as_bytes());
body.extend_from_slice(b"Content-Disposition: form-data; name=\"sdp\"\r\n");
body.extend_from_slice(b"Content-Type: application/sdp\r\n\r\n");
body.extend_from_slice(sdp.as_bytes());
body.extend_from_slice(b"\r\n");
body.extend_from_slice(format!("--{MULTIPART_BOUNDARY}\r\n").as_bytes());
body.extend_from_slice(b"Content-Disposition: form-data; name=\"session\"\r\n");
body.extend_from_slice(b"Content-Type: application/json\r\n\r\n");
body.extend_from_slice(session.as_bytes());
body.extend_from_slice(b"\r\n");
body.extend_from_slice(format!("--{MULTIPART_BOUNDARY}--\r\n").as_bytes());
let resp = self
.session
.execute_with(
Method::POST,
Self::path(),
extra_headers,
/*body*/ None,
|req| {
req.headers.insert(
CONTENT_TYPE,
HeaderValue::from_static(MULTIPART_CONTENT_TYPE),
);
req.body = Some(RequestBody::Raw(Bytes::from(body.clone())));
},
)
.await?;
let sdp = decode_sdp_response(resp.body.as_ref())?;
Ok(RealtimeCallResponse { sdp })
}
}
fn realtime_session_json(session_config: RealtimeSessionConfig) -> Result<Value, ApiError> {
session_update_session_json(session_config)
.map_err(|err| ApiError::Stream(format!("failed to encode realtime call session: {err}")))
}
fn decode_sdp_response(body: &[u8]) -> Result<String, ApiError> {
String::from_utf8(body.to_vec()).map_err(|err| {
ApiError::Stream(format!(
"failed to decode realtime call SDP response: {err}"
))
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::endpoint::realtime_websocket::RealtimeEventParser;
use crate::endpoint::realtime_websocket::RealtimeSessionMode;
use crate::provider::RetryConfig;
use async_trait::async_trait;
use codex_client::Request;
use codex_client::Response;
use codex_client::StreamResponse;
use codex_client::TransportError;
use http::StatusCode;
use pretty_assertions::assert_eq;
use std::sync::Mutex;
use std::time::Duration;
#[derive(Clone)]
struct CapturingTransport {
last_request: Arc<Mutex<Option<Request>>>,
}
impl CapturingTransport {
fn new() -> Self {
Self {
last_request: Arc::new(Mutex::new(None)),
}
}
}
#[async_trait]
impl HttpTransport for CapturingTransport {
async fn execute(&self, req: Request) -> Result<Response, TransportError> {
*self.last_request.lock().unwrap() = Some(req);
Ok(Response {
status: StatusCode::OK,
headers: HeaderMap::new(),
body: Bytes::from_static(b"v=0\r\n"),
})
}
async fn stream(&self, _req: Request) -> Result<StreamResponse, TransportError> {
Err(TransportError::Build("stream should not run".to_string()))
}
}
#[derive(Clone, Default)]
struct DummyAuth;
impl AuthProvider for DummyAuth {
fn bearer_token(&self) -> Option<String> {
Some("test-token".to_string())
}
}
fn provider(base_url: &str) -> Provider {
Provider {
name: "test".to_string(),
base_url: base_url.to_string(),
query_params: None,
headers: HeaderMap::new(),
retry: RetryConfig {
max_attempts: 1,
base_delay: Duration::from_millis(1),
retry_429: false,
retry_5xx: true,
retry_transport: true,
},
stream_idle_timeout: Duration::from_secs(1),
}
}
fn realtime_session_config(session_id: &str) -> RealtimeSessionConfig {
RealtimeSessionConfig {
instructions: "hi".to_string(),
model: Some("gpt-realtime".to_string()),
session_id: Some(session_id.to_string()),
event_parser: RealtimeEventParser::RealtimeV2,
session_mode: RealtimeSessionMode::Conversational,
}
}
#[tokio::test]
async fn sends_sdp_offer_as_raw_body() {
let transport = CapturingTransport::new();
let client = RealtimeCallClient::new(
transport.clone(),
provider("https://api.openai.com/v1"),
DummyAuth,
);
let response = client
.create("v=offer\r\n".to_string())
.await
.expect("request should succeed");
assert_eq!(
response,
RealtimeCallResponse {
sdp: "v=0\r\n".to_string()
}
);
let request = transport.last_request.lock().unwrap().clone().unwrap();
assert_eq!(request.method, Method::POST);
assert_eq!(request.url, "https://api.openai.com/v1/realtime/calls");
assert_eq!(
request.headers.get(CONTENT_TYPE).unwrap(),
HeaderValue::from_static("application/sdp")
);
assert_eq!(
request
.headers
.get(http::header::AUTHORIZATION)
.and_then(|value| value.to_str().ok()),
Some("Bearer test-token")
);
assert_eq!(
request.body,
Some(RequestBody::Raw(Bytes::from_static(b"v=offer\r\n")))
);
}
#[tokio::test]
async fn sends_api_session_call_as_multipart_body() {
let transport = CapturingTransport::new();
let client = RealtimeCallClient::new(
transport.clone(),
provider("https://api.openai.com/v1"),
DummyAuth,
);
let response = client
.create_with_session(
"v=offer\r\n".to_string(),
realtime_session_config("sess-api"),
)
.await
.expect("request should succeed");
assert_eq!(
response,
RealtimeCallResponse {
sdp: "v=0\r\n".to_string()
}
);
let request = transport.last_request.lock().unwrap().clone().unwrap();
assert_eq!(request.method, Method::POST);
assert_eq!(request.url, "https://api.openai.com/v1/realtime/calls");
assert_eq!(
request.headers.get(CONTENT_TYPE).unwrap(),
HeaderValue::from_static(MULTIPART_CONTENT_TYPE)
);
let Some(RequestBody::Raw(body)) = request.body else {
panic!("multipart body should be raw");
};
let body = std::str::from_utf8(&body).expect("multipart body should be utf-8");
let mut session = realtime_session_json(realtime_session_config("sess-api"))
.expect("session should encode");
session
.as_object_mut()
.expect("session should be an object")
.remove("id");
let session = to_string(&session).expect("session should serialize");
assert_eq!(
body,
format!(
"--codex-realtime-call-boundary\r\n\
Content-Disposition: form-data; name=\"sdp\"\r\n\
Content-Type: application/sdp\r\n\
\r\n\
v=offer\r\n\
\r\n\
--codex-realtime-call-boundary\r\n\
Content-Disposition: form-data; name=\"session\"\r\n\
Content-Type: application/json\r\n\
\r\n\
{session}\r\n\
--codex-realtime-call-boundary--\r\n"
)
);
}
#[tokio::test]
async fn sends_backend_session_call_as_json_body() {
let transport = CapturingTransport::new();
let client = RealtimeCallClient::new(
transport.clone(),
provider("https://chatgpt.com/backend-api/codex"),
DummyAuth,
);
let response = client
.create_with_session(
"v=offer\r\n".to_string(),
realtime_session_config("sess-backend"),
)
.await
.expect("request should succeed");
assert_eq!(
response,
RealtimeCallResponse {
sdp: "v=0\r\n".to_string()
}
);
let request = transport.last_request.lock().unwrap().clone().unwrap();
assert_eq!(request.method, Method::POST);
assert_eq!(
request.url,
"https://chatgpt.com/backend-api/codex/realtime/calls"
);
let mut expected_session = realtime_session_json(realtime_session_config("sess-backend"))
.expect("session should encode");
expected_session
.as_object_mut()
.expect("session should be an object")
.remove("id");
assert_eq!(
request.body,
Some(RequestBody::Json(
to_value(BackendRealtimeCallRequest {
sdp: "v=offer\r\n",
session: &expected_session,
})
.expect("request should encode")
))
);
}
}

View File

@@ -8,8 +8,12 @@ use crate::endpoint::realtime_websocket::methods_v2::session_update_session as v
use crate::endpoint::realtime_websocket::methods_v2::websocket_intent as v2_websocket_intent;
use crate::endpoint::realtime_websocket::protocol::RealtimeEventParser;
use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionConfig;
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionMode;
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
use serde_json::Result as JsonResult;
use serde_json::Value;
use serde_json::to_value;
pub(super) const REALTIME_AUDIO_SAMPLE_RATE: u32 = 24_000;
const AGENT_FINAL_MESSAGE_PREFIX: &str = "\"Agent Final Message\":\n\n";
@@ -60,6 +64,17 @@ pub(super) fn session_update_session(
}
}
pub fn session_update_session_json(config: RealtimeSessionConfig) -> JsonResult<Value> {
let mut session = session_update_session(
config.event_parser,
config.instructions,
config.session_mode,
);
session.id = config.session_id;
session.model = config.model;
to_value(session)
}
pub(super) fn websocket_intent(event_parser: RealtimeEventParser) -> Option<&'static str> {
match event_parser {
RealtimeEventParser::V1 => v1_websocket_intent(),

View File

@@ -40,7 +40,9 @@ pub(super) fn conversation_handoff_append_message(
pub(super) fn session_update_session(instructions: String) -> SessionUpdateSession {
SessionUpdateSession {
id: None,
r#type: SessionType::Quicksilver,
model: None,
instructions: Some(instructions),
output_modalities: None,
audio: SessionAudio {

View File

@@ -62,7 +62,9 @@ pub(super) fn session_update_session(
) -> SessionUpdateSession {
match session_mode {
RealtimeSessionMode::Conversational => SessionUpdateSession {
id: None,
r#type: SessionType::Realtime,
model: None,
instructions: Some(instructions),
output_modalities: Some(vec![REALTIME_V2_OUTPUT_MODALITY_AUDIO.to_string()]),
audio: SessionAudio {
@@ -107,7 +109,9 @@ pub(super) fn session_update_session(
tool_choice: Some(REALTIME_V2_TOOL_CHOICE.to_string()),
},
RealtimeSessionMode::Transcription => SessionUpdateSession {
id: None,
r#type: SessionType::Transcription,
model: None,
instructions: None,
output_modalities: None,
audio: SessionAudio {

View File

@@ -11,6 +11,7 @@ pub use methods::RealtimeWebsocketClient;
pub use methods::RealtimeWebsocketConnection;
pub use methods::RealtimeWebsocketEvents;
pub use methods::RealtimeWebsocketWriter;
pub use methods_common::session_update_session_json;
pub use protocol::RealtimeEventParser;
pub use protocol::RealtimeSessionConfig;
pub use protocol::RealtimeSessionMode;

View File

@@ -48,9 +48,13 @@ pub(super) enum RealtimeOutboundMessage {
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionUpdateSession {
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) id: Option<String>,
#[serde(rename = "type")]
pub(super) r#type: SessionType,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) model: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) instructions: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) output_modalities: Option<Vec<String>>,

View File

@@ -5,6 +5,7 @@ use crate::provider::Provider;
use crate::telemetry::run_with_request_telemetry;
use codex_client::HttpTransport;
use codex_client::Request;
use codex_client::RequestBody;
use codex_client::RequestTelemetry;
use codex_client::Response;
use codex_client::StreamResponse;
@@ -53,7 +54,7 @@ impl<T: HttpTransport, A: AuthProvider> EndpointSession<T, A> {
let mut req = self.provider.build_request(method.clone(), path);
req.headers.extend(extra_headers.clone());
if let Some(body) = body {
req.body = Some(body.clone());
req.body = Some(RequestBody::Json(body.clone()));
}
add_auth_headers(&self.auth, req)
}