Add realtime output modality and transcript events (#17701)

- Add outputModality to thread/realtime/start and wire text/audio output
selection through app-server, core, API, and TUI.\n- Rename the realtime
transcript delta notification and add a separate transcript done
notification that forwards final text from item done without correlating
it with deltas.
This commit is contained in:
Ahmed Ibrahim
2026-04-14 00:13:13 -07:00
committed by GitHub
parent a6b03a22cc
commit 2f6fc7c137
38 changed files with 711 additions and 77 deletions

View File

@@ -74,6 +74,7 @@ use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
use codex_protocol::protocol::ReadOnlyAccess as CoreReadOnlyAccess;
use codex_protocol::protocol::RealtimeAudioFrame as CoreRealtimeAudioFrame;
use codex_protocol::protocol::RealtimeConversationVersion;
use codex_protocol::protocol::RealtimeOutputModality;
use codex_protocol::protocol::RealtimeVoice;
use codex_protocol::protocol::RealtimeVoicesList;
use codex_protocol::protocol::ReviewDecision as CoreReviewDecision;
@@ -3976,11 +3977,14 @@ impl From<ThreadRealtimeAudioChunk> for CoreRealtimeAudioFrame {
}
/// EXPERIMENTAL - start a thread-scoped realtime session.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStartParams {
pub thread_id: String,
/// Selects text or audio output for the realtime session. Transport and voice stay
/// independent so clients can choose how they connect separately from what the model emits.
pub output_modality: RealtimeOutputModality,
#[serde(
default,
deserialize_with = "super::serde_helpers::deserialize_double_option",
@@ -4098,9 +4102,22 @@ pub struct ThreadRealtimeItemAddedNotification {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeTranscriptUpdatedNotification {
pub struct ThreadRealtimeTranscriptDeltaNotification {
pub thread_id: String,
pub role: String,
/// Live transcript delta from the realtime event.
pub delta: String,
}
/// EXPERIMENTAL - final transcript text emitted when realtime completes
/// a transcript part.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeTranscriptDoneNotification {
pub thread_id: String,
pub role: String,
/// Final complete text for the transcript part.
pub text: String,
}