Default realtime startup to v2 model

Set the realtime config and Op::RealtimeConversationStart path to use v2 with gpt-realtime-1.5 by default. Add integration coverage for the outbound startup shape and keep legacy v1 tests explicit.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-08 20:54:56 -07:00
parent 2f9090be62
commit 136037d09a
6 changed files with 135 additions and 7 deletions

View File

@@ -457,7 +457,7 @@ pub enum RealtimeTransport {
pub use codex_protocol::protocol::RealtimeConversationVersion as RealtimeWsVersion;
pub use codex_protocol::protocol::RealtimeVoice;
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeConfig {
pub version: RealtimeWsVersion,
@@ -467,6 +467,17 @@ pub struct RealtimeConfig {
pub voice: Option<RealtimeVoice>,
}
impl Default for RealtimeConfig {
fn default() -> Self {
Self {
version: RealtimeWsVersion::V2,
session_type: RealtimeWsMode::default(),
transport: RealtimeTransport::default(),
voice: None,
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeToml {

View File

@@ -6482,6 +6482,35 @@ experimental_realtime_ws_model = "realtime-test-model"
Ok(())
}
#[test]
fn realtime_config_partial_table_uses_realtime_defaults() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(
r#"
[realtime]
voice = "marin"
"#,
)
.expect("TOML deserialization should succeed");
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
cfg,
ConfigOverrides::default(),
codex_home.path().to_path_buf(),
)?;
assert_eq!(
config.realtime,
RealtimeConfig {
version: RealtimeWsVersion::V2,
session_type: RealtimeWsMode::Conversational,
transport: RealtimeTransport::Websocket,
voice: Some(RealtimeVoice::Marin),
}
);
Ok(())
}
#[test]
fn realtime_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(

View File

@@ -2094,11 +2094,14 @@ impl Config {
experimental_realtime_ws_model: cfg.experimental_realtime_ws_model,
realtime: cfg
.realtime
.map_or_else(RealtimeConfig::default, |realtime| RealtimeConfig {
version: realtime.version.unwrap_or_default(),
session_type: realtime.session_type.unwrap_or_default(),
transport: realtime.transport.unwrap_or_default(),
voice: realtime.voice,
.map_or_else(RealtimeConfig::default, |realtime| {
let defaults = RealtimeConfig::default();
RealtimeConfig {
version: realtime.version.unwrap_or(defaults.version),
session_type: realtime.session_type.unwrap_or(defaults.session_type),
transport: realtime.transport.unwrap_or(defaults.transport),
voice: realtime.voice,
}
}),
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
experimental_realtime_ws_startup_context: cfg.experimental_realtime_ws_startup_context,

View File

@@ -61,6 +61,7 @@ const USER_TEXT_IN_QUEUE_CAPACITY: usize = 64;
const HANDOFF_OUT_QUEUE_CAPACITY: usize = 64;
const OUTPUT_EVENTS_QUEUE_CAPACITY: usize = 256;
const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_000;
const DEFAULT_REALTIME_MODEL: &str = "gpt-realtime-1.5";
const ACTIVE_RESPONSE_CONFLICT_ERROR_PREFIX: &str =
"Conversation already has an active response in progress:";
@@ -572,7 +573,12 @@ pub(crate) async fn build_realtime_session_config(
(false, true) => prompt,
(false, false) => format!("{prompt}\n\n{startup_context}"),
};
let model = config.experimental_realtime_ws_model.clone();
let model = Some(
config
.experimental_realtime_ws_model
.clone()
.unwrap_or_else(|| DEFAULT_REALTIME_MODEL.to_string()),
);
let event_parser = match config.realtime.version {
RealtimeWsVersion::V1 => RealtimeEventParser::V1,
RealtimeWsVersion::V2 => RealtimeEventParser::RealtimeV2,

View File

@@ -13,6 +13,7 @@ use std::time::Instant;
use anyhow::Context;
use anyhow::Result;
use anyhow::anyhow;
use codex_config::config_toml::RealtimeWsVersion;
use codex_core::CodexThread;
use codex_core::ThreadManager;
use codex_core::config::Config;
@@ -453,6 +454,7 @@ impl TestCodexBuilder {
config.model_provider.base_url = Some(base_url_clone);
config.model_provider.supports_websockets = true;
config.experimental_realtime_ws_model = Some("realtime-test-model".to_string());
config.realtime.version = RealtimeWsVersion::V1;
}));
let test_env = TestEnv::local().await?;
Box::pin(self.build_with_home_and_base_url(base_url, home, /*resume_from*/ None, test_env))

View File

@@ -360,6 +360,65 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_start_defaults_to_v2_and_gpt_realtime_1_5() -> Result<()> {
skip_if_no_network!(Ok(()));
let api_server = start_mock_server().await;
let realtime_server = start_websocket_server(vec![vec![vec![]]]).await;
let realtime_base_url = realtime_server.uri().to_string();
let mut builder = test_codex().with_config(move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.experimental_realtime_ws_startup_context = Some(String::new());
});
let test = builder.build(&api_server).await?;
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
voice: None,
}))
.await?;
let started = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationStarted(started) => Some(Ok(started.clone())),
EventMsg::Error(err) => Some(Err(err.clone())),
_ => None,
})
.await
.unwrap_or_else(|err: ErrorEvent| panic!("conversation start failed: {err:?}"));
assert!(
realtime_server
.wait_for_handshakes(/*expected*/ 1, Duration::from_secs(2))
.await
);
let session_update = realtime_server
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 0)
.await;
let body = session_update.body_json();
assert_eq!(
json!({
"startedVersion": started.version,
"handshakeUri": realtime_server.single_handshake().uri(),
"voice": body["session"]["audio"]["output"]["voice"],
"instructions": body["session"]["instructions"],
}),
json!({
"startedVersion": RealtimeConversationVersion::V2,
"handshakeUri": "/v1/realtime?model=gpt-realtime-1.5",
"voice": "marin",
"instructions": "backend prompt",
})
);
realtime_server.shutdown().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -393,6 +452,7 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
config.experimental_realtime_ws_model = Some("realtime-test-model".to_string());
config.experimental_realtime_ws_startup_context = Some("startup context".to_string());
config.experimental_realtime_ws_base_url = Some(realtime_ws_base_url);
config.realtime.version = RealtimeWsVersion::V1;
});
let test = builder.build(&server).await?;
@@ -718,6 +778,7 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul
let server = start_websocket_server(vec![]).await;
let mut builder = test_codex().with_config(|config| {
config.experimental_realtime_ws_base_url = Some("http://127.0.0.1:1".to_string());
config.realtime.version = RealtimeWsVersion::V1;
});
let test = builder.build_with_websocket_server(&server).await?;
@@ -908,6 +969,7 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_websocket_server(&startup_server).await?;
@@ -1279,6 +1341,7 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() -
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
config.experimental_realtime_ws_backend_prompt = Some("prompt from config".to_string());
config.experimental_realtime_ws_startup_context =
Some("custom startup context".to_string());
@@ -1342,6 +1405,7 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() ->
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
config.experimental_realtime_ws_backend_prompt = Some("prompt from config".to_string());
config.experimental_realtime_ws_startup_context = Some(String::new());
}
@@ -1404,6 +1468,7 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_websocket_server(&startup_server).await?;
@@ -1466,6 +1531,7 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<()
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_websocket_server(&startup_server).await?;
@@ -1519,6 +1585,7 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() ->
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_websocket_server(&startup_server).await?;
@@ -1607,6 +1674,7 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build(&api_server).await?;
@@ -1735,6 +1803,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_streaming_server(&api_server).await?;
@@ -1878,6 +1947,7 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build(&api_server).await?;
@@ -1974,6 +2044,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build(&api_server).await?;
@@ -2068,6 +2139,7 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() -
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build(&api_server).await?;
@@ -2169,6 +2241,7 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio(
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build(&api_server).await?;
@@ -2283,6 +2356,7 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_streaming_server(&api_server).await?;
@@ -2427,6 +2501,7 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_streaming_server(&api_server).await?;
@@ -2555,6 +2630,7 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_streaming_server(&api_server).await?;
@@ -2698,6 +2774,7 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V1;
}
});
let test = builder.build_with_streaming_server(&api_server).await?;