[codex] Fix realtime v1 websocket compatibility (#23771)

## Why

Realtime v1 websocket sessions now expect a slightly different boundary
shape for text input, completed input transcripts, and connection
headers. Codex was still using the older shape, so some v1 text appends
could be rejected before the existing conversation flow could handle
them.

## What changed

- Send v1 user text items with `input_text` content
- Accept v1 turn-marked input transcript events as completed transcripts
- Add the v1 alpha header only for v1 realtime sessions
- Cover the outbound text shape, transcript parsing, and versioned
headers

## Test plan

- `cargo test -p codex-api endpoint::realtime_websocket::methods::tests`
- `cargo test -p codex-core quicksilver_alpha_header`
This commit is contained in:
guinness-oai
2026-05-20 16:03:51 -07:00
committed by GitHub
parent 370b13afc9
commit d6d03d42ea
6 changed files with 57 additions and 3 deletions

View File

@@ -993,6 +993,22 @@ mod tests {
);
}
#[test]
fn parse_v1_input_transcript_turn_marked_event() {
let payload = json!({
"type": "conversation.input_transcript.turn_marked",
"transcript": "hello realtime"
})
.to_string();
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
Some(RealtimeEvent::InputTranscriptDone(RealtimeTranscriptDone {
text: "hello realtime".to_string(),
}))
);
}
#[test]
fn parse_output_transcript_delta_event() {
let payload = json!({
@@ -1581,6 +1597,10 @@ mod tests {
.expect("text");
let third_json: Value = serde_json::from_str(&third).expect("json");
assert_eq!(third_json["type"], "conversation.item.create");
assert_eq!(
third_json["item"]["content"][0]["type"],
Value::String("input_text".to_string())
);
assert_eq!(third_json["item"]["content"][0]["text"], "hello agent");
let fourth = ws

View File

@@ -21,7 +21,7 @@ pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutbound
r#type: ConversationItemType::Message,
role: ConversationRole::User,
content: vec![ConversationItemContent {
r#type: ConversationContentType::Text,
r#type: ConversationContentType::InputText,
text,
}],
}),

View File

@@ -199,7 +199,6 @@ pub(super) struct ConversationItemContent {
#[derive(Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "snake_case")]
pub(super) enum ConversationContentType {
Text,
InputText,
}

View File

@@ -43,7 +43,8 @@ pub(super) fn parse_realtime_event_v1(payload: &str) -> Option<RealtimeEvent> {
| "conversation.item.input_audio_transcription.delta" => {
parse_transcript_delta_event(&parsed, "delta").map(RealtimeEvent::InputTranscriptDelta)
}
"conversation.item.input_audio_transcription.completed" => {
"conversation.input_transcript.turn_marked"
| "conversation.item.input_audio_transcription.completed" => {
parse_transcript_done_event(&parsed, "transcript")
.map(RealtimeEvent::InputTranscriptDone)
}

View File

@@ -641,12 +641,14 @@ async fn prepare_realtime_start(
realtime_request_headers(
requested_realtime_session_id.as_deref(),
Some(realtime_api_key.as_str()),
version,
)?
}
ConversationStartTransport::Webrtc { .. } => {
realtime_request_headers(
requested_realtime_session_id.as_deref(),
/*api_key*/ None,
version,
)?
}
};
@@ -973,9 +975,14 @@ fn realtime_api_key(auth: Option<&CodexAuth>, provider: &ModelProviderInfo) -> C
fn realtime_request_headers(
realtime_session_id: Option<&str>,
api_key: Option<&str>,
version: RealtimeWsVersion,
) -> CodexResult<Option<HeaderMap>> {
let mut headers = HeaderMap::new();
if version == RealtimeWsVersion::V1 {
headers.insert("openai-alpha", HeaderValue::from_static("quicksilver=v1"));
}
if let Some(realtime_session_id) = realtime_session_id
&& let Ok(realtime_session_id) = HeaderValue::from_str(realtime_session_id)
{

View File

@@ -1,9 +1,11 @@
use super::RealtimeHandoffState;
use super::RealtimeSessionKind;
use super::realtime_delegation_from_handoff;
use super::realtime_request_headers;
use super::realtime_text_from_handoff_request;
use super::wrap_realtime_delegation_input;
use async_channel::bounded;
use codex_config::config_toml::RealtimeWsVersion;
use codex_protocol::protocol::RealtimeHandoffRequested;
use codex_protocol::protocol::RealtimeTranscriptEntry;
use pretty_assertions::assert_eq;
@@ -137,3 +139,28 @@ async fn clears_active_handoff_explicitly() {
*state.active_handoff.lock().await = None;
assert_eq!(state.active_handoff.lock().await.clone(), None);
}
#[test]
fn uses_quicksilver_alpha_header_for_realtime_v1() {
let headers =
realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V1)
.expect("headers")
.expect("headers");
assert_eq!(
headers
.get("openai-alpha")
.and_then(|value| value.to_str().ok()),
Some("quicksilver=v1")
);
}
#[test]
fn omits_quicksilver_alpha_header_for_realtime_v2() {
let headers =
realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V2)
.expect("headers")
.expect("headers");
assert!(headers.get("openai-alpha").is_none());
}