Add app-server v2 thread realtime API (#12715)

Add experimental `thread/realtime/*` v2 requests and notifications, then
route app-server realtime events through that thread-scoped surface with
integration coverage.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-02-25 09:59:10 -08:00
committed by GitHub
parent 0543d0a022
commit 947092283a
25 changed files with 1706 additions and 2 deletions

View File

@@ -46,6 +46,7 @@ use codex_protocol::protocol::PatchApplyStatus as CorePatchApplyStatus;
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
use codex_protocol::protocol::ReadOnlyAccess as CoreReadOnlyAccess;
use codex_protocol::protocol::RealtimeAudioFrame as CoreRealtimeAudioFrame;
use codex_protocol::protocol::RejectConfig as CoreRejectConfig;
use codex_protocol::protocol::SessionSource as CoreSessionSource;
use codex_protocol::protocol::SkillDependencies as CoreSkillDependencies;
@@ -2551,6 +2552,157 @@ pub struct ErrorNotification {
pub turn_id: String,
}
/// EXPERIMENTAL - thread realtime audio chunk.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeAudioChunk {
pub data: String,
pub sample_rate: u32,
pub num_channels: u16,
pub samples_per_channel: Option<u32>,
}
impl From<CoreRealtimeAudioFrame> for ThreadRealtimeAudioChunk {
fn from(value: CoreRealtimeAudioFrame) -> Self {
let CoreRealtimeAudioFrame {
data,
sample_rate,
num_channels,
samples_per_channel,
} = value;
Self {
data,
sample_rate,
num_channels,
samples_per_channel,
}
}
}
impl From<ThreadRealtimeAudioChunk> for CoreRealtimeAudioFrame {
fn from(value: ThreadRealtimeAudioChunk) -> Self {
let ThreadRealtimeAudioChunk {
data,
sample_rate,
num_channels,
samples_per_channel,
} = value;
Self {
data,
sample_rate,
num_channels,
samples_per_channel,
}
}
}
/// EXPERIMENTAL - start a thread-scoped realtime session.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStartParams {
pub thread_id: String,
pub prompt: String,
#[ts(optional = nullable)]
pub session_id: Option<String>,
}
/// EXPERIMENTAL - response for starting thread realtime.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStartResponse {}
/// EXPERIMENTAL - append audio input to thread realtime.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeAppendAudioParams {
pub thread_id: String,
pub audio: ThreadRealtimeAudioChunk,
}
/// EXPERIMENTAL - response for appending realtime audio input.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeAppendAudioResponse {}
/// EXPERIMENTAL - append text input to thread realtime.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeAppendTextParams {
pub thread_id: String,
pub text: String,
}
/// EXPERIMENTAL - response for appending realtime text input.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeAppendTextResponse {}
/// EXPERIMENTAL - stop thread realtime.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStopParams {
pub thread_id: String,
}
/// EXPERIMENTAL - response for stopping thread realtime.
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStopResponse {}
/// EXPERIMENTAL - emitted when thread realtime startup is accepted.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeStartedNotification {
pub thread_id: String,
pub session_id: Option<String>,
}
/// EXPERIMENTAL - raw non-audio thread realtime item emitted by the backend.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeItemAddedNotification {
pub thread_id: String,
pub item: JsonValue,
}
/// EXPERIMENTAL - streamed output audio emitted by thread realtime.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeOutputAudioDeltaNotification {
pub thread_id: String,
pub audio: ThreadRealtimeAudioChunk,
}
/// EXPERIMENTAL - emitted when thread realtime encounters an error.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeErrorNotification {
pub thread_id: String,
pub message: String,
}
/// EXPERIMENTAL - emitted when thread realtime transport closes.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct ThreadRealtimeClosedNotification {
pub thread_id: String,
pub reason: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]