Add realtime v2 event parser behind feature flag (#14537)

- Add a feature-flagged realtime v2 parser on the existing
websocket/session pipeline.
- Wire parser selection from core feature flags and map the codex
handoff tool-call path into existing handoff events.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-12 21:12:40 -07:00
committed by GitHub
parent 650beb177e
commit 3e8f47169e
9 changed files with 352 additions and 10 deletions

View File

@@ -2,6 +2,7 @@ use crate::endpoint::realtime_websocket::protocol::ConversationItem;
use crate::endpoint::realtime_websocket::protocol::ConversationItemContent;
use crate::endpoint::realtime_websocket::protocol::RealtimeAudioFrame;
use crate::endpoint::realtime_websocket::protocol::RealtimeEvent;
use crate::endpoint::realtime_websocket::protocol::RealtimeEventParser;
use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
use crate::endpoint::realtime_websocket::protocol::RealtimeSessionConfig;
use crate::endpoint::realtime_websocket::protocol::RealtimeTranscriptDelta;
@@ -202,6 +203,7 @@ pub struct RealtimeWebsocketWriter {
pub struct RealtimeWebsocketEvents {
rx_message: Arc<Mutex<mpsc::UnboundedReceiver<Result<Message, WsError>>>>,
active_transcript: Arc<Mutex<ActiveTranscriptState>>,
event_parser: RealtimeEventParser,
is_closed: Arc<AtomicBool>,
}
@@ -248,6 +250,7 @@ impl RealtimeWebsocketConnection {
fn new(
stream: WsStream,
rx_message: mpsc::UnboundedReceiver<Result<Message, WsError>>,
event_parser: RealtimeEventParser,
) -> Self {
let stream = Arc::new(stream);
let is_closed = Arc::new(AtomicBool::new(false));
@@ -259,6 +262,7 @@ impl RealtimeWebsocketConnection {
events: RealtimeWebsocketEvents {
rx_message: Arc::new(Mutex::new(rx_message)),
active_transcript: Arc::new(Mutex::new(ActiveTranscriptState::default())),
event_parser,
is_closed,
},
}
@@ -376,7 +380,7 @@ impl RealtimeWebsocketEvents {
match msg {
Message::Text(text) => {
if let Some(mut event) = parse_realtime_event(&text) {
if let Some(mut event) = parse_realtime_event(&text, self.event_parser) {
self.update_active_transcript(&mut event).await;
debug!(?event, "realtime websocket parsed event");
return Ok(Some(event));
@@ -495,7 +499,7 @@ impl RealtimeWebsocketClient {
);
let (stream, rx_message) = WsStream::new(stream);
let connection = RealtimeWebsocketConnection::new(stream, rx_message);
let connection = RealtimeWebsocketConnection::new(stream, rx_message, config.event_parser);
debug!(
session_id = config.session_id.as_deref().unwrap_or("<none>"),
"realtime websocket sending session.update"
@@ -636,7 +640,7 @@ mod tests {
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::SessionUpdated {
session_id: "sess_123".to_string(),
instructions: Some("backend prompt".to_string()),
@@ -655,7 +659,7 @@ mod tests {
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::AudioOut(RealtimeAudioFrame {
data: "AAA=".to_string(),
sample_rate: 48000,
@@ -673,7 +677,7 @@ mod tests {
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::ConversationItemAdded(
json!({"type": "message", "seq": 7})
))
@@ -688,7 +692,7 @@ mod tests {
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::ConversationItemDone {
item_id: "item_123".to_string(),
})
@@ -706,7 +710,7 @@ mod tests {
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::HandoffRequested(RealtimeHandoffRequested {
handoff_id: "handoff_123".to_string(),
item_id: "item_123".to_string(),
@@ -725,7 +729,7 @@ mod tests {
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::InputTranscriptDelta(
RealtimeTranscriptDelta {
delta: "hello ".to_string(),
@@ -743,7 +747,7 @@ mod tests {
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str()),
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::OutputTranscriptDelta(
RealtimeTranscriptDelta {
delta: "hi".to_string(),
@@ -752,6 +756,68 @@ mod tests {
);
}
#[test]
fn parse_realtime_v2_handoff_tool_call_event() {
let payload = json!({
"type": "conversation.item.done",
"item": {
"id": "item_123",
"type": "function_call",
"name": "codex",
"call_id": "call_123",
"arguments": "{\"prompt\":\"delegate this\"}"
}
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::RealtimeV2),
Some(RealtimeEvent::HandoffRequested(RealtimeHandoffRequested {
handoff_id: "call_123".to_string(),
item_id: "item_123".to_string(),
input_transcript: "delegate this".to_string(),
active_transcript: Vec::new(),
}))
);
}
#[test]
fn parse_realtime_v2_input_audio_transcription_delta_event() {
let payload = json!({
"type": "conversation.item.input_audio_transcription.delta",
"delta": "hello"
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::RealtimeV2),
Some(RealtimeEvent::InputTranscriptDelta(
RealtimeTranscriptDelta {
delta: "hello".to_string(),
}
))
);
}
#[test]
fn parse_realtime_v2_output_audio_delta_defaults_audio_shape() {
let payload = json!({
"type": "response.output_audio.delta",
"delta": "AQID"
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::RealtimeV2),
Some(RealtimeEvent::AudioOut(RealtimeAudioFrame {
data: "AQID".to_string(),
sample_rate: 24_000,
num_channels: 1,
samples_per_channel: None,
}))
);
}
#[test]
fn merge_request_headers_matches_http_precedence() {
let mut provider_headers = HeaderMap::new();
@@ -1008,6 +1074,7 @@ mod tests {
instructions: "backend prompt".to_string(),
model: Some("realtime-test-model".to_string()),
session_id: Some("conv_1".to_string()),
event_parser: RealtimeEventParser::V1,
},
HeaderMap::new(),
HeaderMap::new(),
@@ -1190,6 +1257,7 @@ mod tests {
instructions: "backend prompt".to_string(),
model: Some("realtime-test-model".to_string()),
session_id: Some("conv_1".to_string()),
event_parser: RealtimeEventParser::V1,
},
HeaderMap::new(),
HeaderMap::new(),