mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
Add realtime transcription mode for websocket sessions (#14556)
- add experimental_realtime_ws_mode (conversational/transcription) and plumb it into realtime conversation session config - switch realtime websocket intent and session.update payload shape based on mode - update config schema and realtime/config tests --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -463,6 +463,9 @@ pub struct Config {
|
||||
/// Experimental / do not use. Selects the realtime websocket model/snapshot
|
||||
/// used for the `Op::RealtimeConversation` connection.
|
||||
pub experimental_realtime_ws_model: Option<String>,
|
||||
/// Experimental / do not use. Selects the realtime websocket intent mode.
|
||||
/// `conversational` is speech-to-speech while `transcription` is transcript-only.
|
||||
pub experimental_realtime_ws_mode: RealtimeWsMode,
|
||||
/// Experimental / do not use. Overrides only the realtime conversation
|
||||
/// websocket transport instructions (the `Op::RealtimeConversation`
|
||||
/// `/ws` session.update instructions) without changing normal prompts.
|
||||
@@ -1238,6 +1241,9 @@ pub struct ConfigToml {
|
||||
/// Experimental / do not use. Selects the realtime websocket model/snapshot
|
||||
/// used for the `Op::RealtimeConversation` connection.
|
||||
pub experimental_realtime_ws_model: Option<String>,
|
||||
/// Experimental / do not use. Selects the realtime websocket intent mode.
|
||||
/// `conversational` is speech-to-speech while `transcription` is transcript-only.
|
||||
pub experimental_realtime_ws_mode: Option<RealtimeWsMode>,
|
||||
/// Experimental / do not use. Overrides only the realtime conversation
|
||||
/// websocket transport instructions (the `Op::RealtimeConversation`
|
||||
/// `/ws` session.update instructions) without changing normal prompts.
|
||||
@@ -1383,6 +1389,14 @@ pub struct RealtimeAudioConfig {
|
||||
pub speaker: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq, JsonSchema)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RealtimeWsMode {
|
||||
#[default]
|
||||
Conversational,
|
||||
Transcription,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
|
||||
#[schemars(deny_unknown_fields)]
|
||||
pub struct RealtimeAudioToml {
|
||||
@@ -2462,6 +2476,7 @@ impl Config {
|
||||
}),
|
||||
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
|
||||
experimental_realtime_ws_model: cfg.experimental_realtime_ws_model,
|
||||
experimental_realtime_ws_mode: cfg.experimental_realtime_ws_mode.unwrap_or_default(),
|
||||
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
|
||||
experimental_realtime_ws_startup_context: cfg.experimental_realtime_ws_startup_context,
|
||||
experimental_realtime_start_instructions: cfg.experimental_realtime_start_instructions,
|
||||
|
||||
Reference in New Issue
Block a user