Add realtime output modality and transcript events (#17701)

- Add outputModality to thread/realtime/start and wire text/audio output
selection through app-server, core, API, and TUI.\n- Rename the realtime
transcript delta notification and add a separate transcript done
notification that forwards final text from item done without correlating
it with deltas.
This commit is contained in:
Ahmed Ibrahim
2026-04-14 00:13:13 -07:00
committed by GitHub
parent a6b03a22cc
commit 2f6fc7c137
38 changed files with 711 additions and 77 deletions

View File

@@ -6,6 +6,7 @@ use codex_api::Provider;
use codex_api::RealtimeAudioFrame;
use codex_api::RealtimeEvent;
use codex_api::RealtimeEventParser;
use codex_api::RealtimeOutputModality;
use codex_api::RealtimeSessionConfig;
use codex_api::RealtimeSessionMode;
use codex_api::RealtimeWebsocketClient;
@@ -145,6 +146,7 @@ async fn realtime_ws_e2e_session_create_and_event_flow() {
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::V1,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Cove,
},
HeaderMap::new(),
@@ -248,6 +250,7 @@ async fn realtime_ws_connect_webrtc_sideband_retries_join_until_server_is_availa
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::RealtimeV2,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Marin,
},
"rtc_test",
@@ -319,6 +322,7 @@ async fn realtime_ws_e2e_send_while_next_event_waits() {
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::V1,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Cove,
},
HeaderMap::new(),
@@ -386,6 +390,7 @@ async fn realtime_ws_e2e_disconnected_emitted_once() {
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::V1,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Cove,
},
HeaderMap::new(),
@@ -449,6 +454,7 @@ async fn realtime_ws_e2e_ignores_unknown_text_events() {
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::V1,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Cove,
},
HeaderMap::new(),
@@ -515,6 +521,7 @@ async fn realtime_ws_e2e_realtime_v2_parser_emits_handoff_requested() {
session_id: Some("conv_123".to_string()),
event_parser: RealtimeEventParser::RealtimeV2,
session_mode: RealtimeSessionMode::Conversational,
output_modality: RealtimeOutputModality::Audio,
voice: RealtimeVoice::Marin,
},
HeaderMap::new(),