Add realtime transcription mode for websocket sessions (#14556)

- add experimental_realtime_ws_mode (conversational/transcription) and
plumb it into realtime conversation session config
- switch realtime websocket intent and session.update payload shape
based on mode
- update config schema and realtime/config tests

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-12 23:50:30 -07:00
committed by GitHub
parent eaf81d3f6f
commit 2253a9d1d7
9 changed files with 482 additions and 63 deletions

View File

@@ -4129,6 +4129,7 @@ fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
experimental_realtime_ws_mode: RealtimeWsMode::Conversational,
experimental_realtime_ws_backend_prompt: None,
experimental_realtime_ws_startup_context: None,
base_instructions: None,
@@ -4265,6 +4266,7 @@ fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
experimental_realtime_ws_mode: RealtimeWsMode::Conversational,
experimental_realtime_ws_backend_prompt: None,
experimental_realtime_ws_startup_context: None,
base_instructions: None,
@@ -4399,6 +4401,7 @@ fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
experimental_realtime_ws_mode: RealtimeWsMode::Conversational,
experimental_realtime_ws_backend_prompt: None,
experimental_realtime_ws_startup_context: None,
base_instructions: None,
@@ -4519,6 +4522,7 @@ fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
experimental_realtime_ws_mode: RealtimeWsMode::Conversational,
experimental_realtime_ws_backend_prompt: None,
experimental_realtime_ws_startup_context: None,
base_instructions: None,
@@ -5566,6 +5570,34 @@ experimental_realtime_ws_model = "realtime-test-model"
Ok(())
}
#[test]
fn experimental_realtime_ws_mode_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(
r#"
experimental_realtime_ws_mode = "transcription"
"#,
)
.expect("TOML deserialization should succeed");
assert_eq!(
cfg.experimental_realtime_ws_mode,
Some(RealtimeWsMode::Transcription)
);
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
cfg,
ConfigOverrides::default(),
codex_home.path().to_path_buf(),
)?;
assert_eq!(
config.experimental_realtime_ws_mode,
RealtimeWsMode::Transcription
);
Ok(())
}
#[test]
fn realtime_audio_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(