Unify realtime v1/v2 session config (#14606)

## Summary
- unify realtime websocket settings under `[realtime]` (`version` and
`type`)
- remove `realtime_conversation_v2` and select parser/session mode from
config

## Testing
- not run (per request)

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-13 11:35:38 -07:00
committed by GitHub
parent 9dba7337f2
commit 3aabce9e0a
6 changed files with 96 additions and 48 deletions

View File

@@ -466,9 +466,9 @@ pub struct Config {
/// Experimental / do not use. Selects the realtime websocket model/snapshot
/// used for the `Op::RealtimeConversation` connection.
pub experimental_realtime_ws_model: Option<String>,
/// Experimental / do not use. Selects the realtime websocket intent mode.
/// `conversational` is speech-to-speech while `transcription` is transcript-only.
pub experimental_realtime_ws_mode: RealtimeWsMode,
/// Experimental / do not use. Realtime websocket session selection.
/// `version` controls v1/v2 and `type` controls conversational/transcription.
pub realtime: RealtimeConfig,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport instructions (the `Op::RealtimeConversation`
/// `/ws` session.update instructions) without changing normal prompts.
@@ -1244,9 +1244,10 @@ pub struct ConfigToml {
/// Experimental / do not use. Selects the realtime websocket model/snapshot
/// used for the `Op::RealtimeConversation` connection.
pub experimental_realtime_ws_model: Option<String>,
/// Experimental / do not use. Selects the realtime websocket intent mode.
/// `conversational` is speech-to-speech while `transcription` is transcript-only.
pub experimental_realtime_ws_mode: Option<RealtimeWsMode>,
/// Experimental / do not use. Realtime websocket session selection.
/// `version` controls v1/v2 and `type` controls conversational/transcription.
#[serde(default)]
pub realtime: Option<RealtimeToml>,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport instructions (the `Op::RealtimeConversation`
/// `/ws` session.update instructions) without changing normal prompts.
@@ -1400,6 +1401,30 @@ pub enum RealtimeWsMode {
Transcription,
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum RealtimeWsVersion {
#[default]
V1,
V2,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeConfig {
pub version: RealtimeWsVersion,
#[serde(rename = "type")]
pub session_type: RealtimeWsMode,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeToml {
pub version: Option<RealtimeWsVersion>,
#[serde(rename = "type")]
pub session_type: Option<RealtimeWsMode>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeAudioToml {
@@ -2482,7 +2507,12 @@ impl Config {
}),
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
experimental_realtime_ws_model: cfg.experimental_realtime_ws_model,
experimental_realtime_ws_mode: cfg.experimental_realtime_ws_mode.unwrap_or_default(),
realtime: cfg
.realtime
.map_or_else(RealtimeConfig::default, |realtime| RealtimeConfig {
version: realtime.version.unwrap_or_default(),
session_type: realtime.session_type.unwrap_or_default(),
}),
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
experimental_realtime_ws_startup_context: cfg.experimental_realtime_ws_startup_context,
experimental_realtime_start_instructions: cfg.experimental_realtime_start_instructions,