Add realtime output modality and transcript events (#17701)

- Add outputModality to thread/realtime/start and wire text/audio output
selection through app-server, core, API, and TUI.\n- Rename the realtime
transcript delta notification and add a separate transcript done
notification that forwards final text from item done without correlating
it with deltas.
This commit is contained in:
Ahmed Ibrahim
2026-04-14 00:13:13 -07:00
committed by GitHub
parent a6b03a22cc
commit 2f6fc7c137
38 changed files with 711 additions and 77 deletions

View File

@@ -1026,8 +1026,10 @@ server_notification_definitions! {
ThreadRealtimeStarted => "thread/realtime/started" (v2::ThreadRealtimeStartedNotification),
#[experimental("thread/realtime/itemAdded")]
ThreadRealtimeItemAdded => "thread/realtime/itemAdded" (v2::ThreadRealtimeItemAddedNotification),
#[experimental("thread/realtime/transcriptUpdated")]
ThreadRealtimeTranscriptUpdated => "thread/realtime/transcriptUpdated" (v2::ThreadRealtimeTranscriptUpdatedNotification),
#[experimental("thread/realtime/transcript/delta")]
ThreadRealtimeTranscriptDelta => "thread/realtime/transcript/delta" (v2::ThreadRealtimeTranscriptDeltaNotification),
#[experimental("thread/realtime/transcript/done")]
ThreadRealtimeTranscriptDone => "thread/realtime/transcript/done" (v2::ThreadRealtimeTranscriptDoneNotification),
#[experimental("thread/realtime/outputAudio/delta")]
ThreadRealtimeOutputAudioDelta => "thread/realtime/outputAudio/delta" (v2::ThreadRealtimeOutputAudioDeltaNotification),
#[experimental("thread/realtime/sdp")]
@@ -1060,6 +1062,8 @@ mod tests {
use codex_protocol::account::PlanType;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::RealtimeConversationVersion;
use codex_protocol::protocol::RealtimeOutputModality;
use codex_protocol::protocol::RealtimeVoice;
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -1788,10 +1792,11 @@ mod tests {
request_id: RequestId::Integer(9),
params: v2::ThreadRealtimeStartParams {
thread_id: "thr_123".to_string(),
output_modality: RealtimeOutputModality::Audio,
prompt: Some(Some("You are on a call".to_string())),
session_id: Some("sess_456".to_string()),
transport: None,
voice: Some(codex_protocol::protocol::RealtimeVoice::Marin),
voice: Some(RealtimeVoice::Marin),
},
};
assert_eq!(
@@ -1800,6 +1805,7 @@ mod tests {
"id": 9,
"params": {
"threadId": "thr_123",
"outputModality": "audio",
"prompt": "You are on a call",
"sessionId": "sess_456",
"transport": null,
@@ -1817,6 +1823,7 @@ mod tests {
request_id: RequestId::Integer(9),
params: v2::ThreadRealtimeStartParams {
thread_id: "thr_123".to_string(),
output_modality: RealtimeOutputModality::Audio,
prompt: None,
session_id: None,
transport: None,
@@ -1829,6 +1836,7 @@ mod tests {
"id": 9,
"params": {
"threadId": "thr_123",
"outputModality": "audio",
"sessionId": null,
"transport": null,
"voice": null
@@ -1841,6 +1849,7 @@ mod tests {
request_id: RequestId::Integer(9),
params: v2::ThreadRealtimeStartParams {
thread_id: "thr_123".to_string(),
output_modality: RealtimeOutputModality::Audio,
prompt: Some(None),
session_id: None,
transport: None,
@@ -1853,6 +1862,7 @@ mod tests {
"id": 9,
"params": {
"threadId": "thr_123",
"outputModality": "audio",
"prompt": null,
"sessionId": null,
"transport": null,
@@ -1867,6 +1877,7 @@ mod tests {
"id": 9,
"params": {
"threadId": "thr_123",
"outputModality": "audio",
"sessionId": null,
"transport": null,
"voice": null
@@ -1882,6 +1893,7 @@ mod tests {
"id": 9,
"params": {
"threadId": "thr_123",
"outputModality": "audio",
"prompt": null,
"sessionId": null,
"transport": null,
@@ -1966,6 +1978,7 @@ mod tests {
request_id: RequestId::Integer(1),
params: v2::ThreadRealtimeStartParams {
thread_id: "thr_123".to_string(),
output_modality: RealtimeOutputModality::Audio,
prompt: Some(Some("You are on a call".to_string())),
session_id: None,
transport: None,