mirror of
https://github.com/openai/codex.git
synced 2026-04-29 08:56:38 +00:00
[stack 2/4] Align main realtime v2 wire and runtime flow (#14830)
## Stack Position 2/4. Built on top of #14828. ## Base - #14828 ## Unblocks - #14829 - #14827 ## Scope - Port the realtime v2 wire parsing, session, app-server, and conversation runtime behavior onto the split websocket-method base. - Branch runtime behavior directly on the current realtime session kind instead of parser-derived flow flags. - Keep regression coverage in the existing e2e suites. --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -39,6 +39,8 @@ pub(super) enum RealtimeOutboundMessage {
|
||||
handoff_id: String,
|
||||
output_text: String,
|
||||
},
|
||||
#[serde(rename = "response.create")]
|
||||
ResponseCreate,
|
||||
#[serde(rename = "session.update")]
|
||||
SessionUpdate { session: SessionUpdateSession },
|
||||
#[serde(rename = "conversation.item.create")]
|
||||
@@ -48,12 +50,24 @@ pub(super) enum RealtimeOutboundMessage {
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionUpdateSession {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) r#type: SessionType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) instructions: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) output_modalities: Option<Vec<String>>,
|
||||
pub(super) audio: SessionAudio,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) tools: Option<Vec<SessionFunctionTool>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) tool_choice: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum SessionType {
|
||||
Quicksilver,
|
||||
Realtime,
|
||||
Transcription,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
@@ -66,17 +80,29 @@ pub(super) struct SessionAudio {
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionAudioInput {
|
||||
pub(super) format: SessionAudioFormat,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) noise_reduction: Option<SessionNoiseReduction>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) turn_detection: Option<SessionTurnDetection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionAudioFormat {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) r#type: AudioFormatType,
|
||||
pub(super) rate: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
pub(super) enum AudioFormatType {
|
||||
#[serde(rename = "audio/pcm")]
|
||||
AudioPcm,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionAudioOutput {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(super) format: Option<SessionAudioOutputFormat>,
|
||||
pub(super) voice: SessionAudioVoice,
|
||||
}
|
||||
|
||||
@@ -84,18 +110,64 @@ pub(super) struct SessionAudioOutput {
|
||||
pub(super) enum SessionAudioVoice {
|
||||
#[serde(rename = "fathom")]
|
||||
Fathom,
|
||||
#[serde(rename = "alloy")]
|
||||
Alloy,
|
||||
#[serde(rename = "marin")]
|
||||
Marin,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionNoiseReduction {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) r#type: NoiseReductionType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum NoiseReductionType {
|
||||
NearField,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionTurnDetection {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) r#type: TurnDetectionType,
|
||||
pub(super) interrupt_response: bool,
|
||||
pub(super) create_response: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum TurnDetectionType {
|
||||
ServerVad,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionAudioOutputFormat {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) r#type: AudioFormatType,
|
||||
pub(super) rate: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct ConversationMessageItem {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) role: String,
|
||||
pub(super) r#type: ConversationItemType,
|
||||
pub(super) role: ConversationRole,
|
||||
pub(super) content: Vec<ConversationItemContent>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum ConversationItemType {
|
||||
Message,
|
||||
FunctionCallOutput,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum ConversationRole {
|
||||
User,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(untagged)]
|
||||
pub(super) enum ConversationItemPayload {
|
||||
@@ -106,7 +178,7 @@ pub(super) enum ConversationItemPayload {
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct ConversationFunctionCallOutputItem {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) r#type: ConversationItemType,
|
||||
pub(super) call_id: String,
|
||||
pub(super) output: String,
|
||||
}
|
||||
@@ -114,19 +186,32 @@ pub(super) struct ConversationFunctionCallOutputItem {
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct ConversationItemContent {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) r#type: ConversationContentType,
|
||||
pub(super) text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum ConversationContentType {
|
||||
Text,
|
||||
InputText,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(super) struct SessionFunctionTool {
|
||||
#[serde(rename = "type")]
|
||||
pub(super) kind: String,
|
||||
pub(super) r#type: SessionToolType,
|
||||
pub(super) name: String,
|
||||
pub(super) description: String,
|
||||
pub(super) parameters: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum SessionToolType {
|
||||
Function,
|
||||
}
|
||||
|
||||
pub(super) fn parse_realtime_event(
|
||||
payload: &str,
|
||||
event_parser: RealtimeEventParser,
|
||||
|
||||
Reference in New Issue
Block a user