Add WebRTC transport to realtime start (#16960)

Adds WebRTC startup to the experimental app-server
`thread/realtime/start` method with an optional transport enum. The
websocket path remains the default; WebRTC offers create the realtime
session through the shared start flow and emit the answer SDP via
`thread/realtime/sdp`.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-07 15:43:38 -07:00
committed by GitHub
parent 6c36e7d688
commit fb3dcfde1d
42 changed files with 1574 additions and 85 deletions

View File

@@ -8,8 +8,12 @@ use crate::endpoint::realtime_websocket::methods_v2::session_update_session as v
use crate::endpoint::realtime_websocket::methods_v2::websocket_intent as v2_websocket_intent;
use crate::endpoint::realtime_websocket::protocol::RealtimeEventParser;
use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionConfig;
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionMode;
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
use serde_json::Result as JsonResult;
use serde_json::Value;
use serde_json::to_value;
pub(super) const REALTIME_AUDIO_SAMPLE_RATE: u32 = 24_000;
const AGENT_FINAL_MESSAGE_PREFIX: &str = "\"Agent Final Message\":\n\n";
@@ -60,6 +64,17 @@ pub(super) fn session_update_session(
}
}
pub fn session_update_session_json(config: RealtimeSessionConfig) -> JsonResult<Value> {
let mut session = session_update_session(
config.event_parser,
config.instructions,
config.session_mode,
);
session.id = config.session_id;
session.model = config.model;
to_value(session)
}
pub(super) fn websocket_intent(event_parser: RealtimeEventParser) -> Option<&'static str> {
match event_parser {
RealtimeEventParser::V1 => v1_websocket_intent(),

View File

@@ -40,7 +40,9 @@ pub(super) fn conversation_handoff_append_message(
pub(super) fn session_update_session(instructions: String) -> SessionUpdateSession {
SessionUpdateSession {
id: None,
r#type: SessionType::Quicksilver,
model: None,
instructions: Some(instructions),
output_modalities: None,
audio: SessionAudio {

View File

@@ -62,7 +62,9 @@ pub(super) fn session_update_session(
) -> SessionUpdateSession {
match session_mode {
RealtimeSessionMode::Conversational => SessionUpdateSession {
id: None,
r#type: SessionType::Realtime,
model: None,
instructions: Some(instructions),
output_modalities: Some(vec![REALTIME_V2_OUTPUT_MODALITY_AUDIO.to_string()]),
audio: SessionAudio {
@@ -107,7 +109,9 @@ pub(super) fn session_update_session(
tool_choice: Some(REALTIME_V2_TOOL_CHOICE.to_string()),
},
RealtimeSessionMode::Transcription => SessionUpdateSession {
id: None,
r#type: SessionType::Transcription,
model: None,
instructions: None,
output_modalities: None,
audio: SessionAudio {

View File

@@ -11,6 +11,7 @@ pub use methods::RealtimeWebsocketClient;
pub use methods::RealtimeWebsocketConnection;
pub use methods::RealtimeWebsocketEvents;
pub use methods::RealtimeWebsocketWriter;
pub use methods_common::session_update_session_json;
pub use protocol::RealtimeEventParser;
pub use protocol::RealtimeSessionConfig;
pub use protocol::RealtimeSessionMode;

View File

@@ -48,9 +48,13 @@ pub(super) enum RealtimeOutboundMessage {
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionUpdateSession {
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) id: Option<String>,
#[serde(rename = "type")]
pub(super) r#type: SessionType,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) model: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) instructions: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) output_modalities: Option<Vec<String>>,