mirror of
https://github.com/openai/codex.git
synced 2026-05-23 12:34:25 +00:00
[codex] Fix realtime v1 websocket compatibility (#23771)
## Why Realtime v1 websocket sessions now expect a slightly different boundary shape for text input, completed input transcripts, and connection headers. Codex was still using the older shape, so some v1 text appends could be rejected before the existing conversation flow could handle them. ## What changed - Send v1 user text items with `input_text` content - Accept v1 turn-marked input transcript events as completed transcripts - Add the v1 alpha header only for v1 realtime sessions - Cover the outbound text shape, transcript parsing, and versioned headers ## Test plan - `cargo test -p codex-api endpoint::realtime_websocket::methods::tests` - `cargo test -p codex-core quicksilver_alpha_header`
This commit is contained in:
@@ -993,6 +993,22 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_v1_input_transcript_turn_marked_event() {
|
||||
let payload = json!({
|
||||
"type": "conversation.input_transcript.turn_marked",
|
||||
"transcript": "hello realtime"
|
||||
})
|
||||
.to_string();
|
||||
|
||||
assert_eq!(
|
||||
parse_realtime_event(payload.as_str(), RealtimeEventParser::V1),
|
||||
Some(RealtimeEvent::InputTranscriptDone(RealtimeTranscriptDone {
|
||||
text: "hello realtime".to_string(),
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_output_transcript_delta_event() {
|
||||
let payload = json!({
|
||||
@@ -1581,6 +1597,10 @@ mod tests {
|
||||
.expect("text");
|
||||
let third_json: Value = serde_json::from_str(&third).expect("json");
|
||||
assert_eq!(third_json["type"], "conversation.item.create");
|
||||
assert_eq!(
|
||||
third_json["item"]["content"][0]["type"],
|
||||
Value::String("input_text".to_string())
|
||||
);
|
||||
assert_eq!(third_json["item"]["content"][0]["text"], "hello agent");
|
||||
|
||||
let fourth = ws
|
||||
|
||||
@@ -21,7 +21,7 @@ pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutbound
|
||||
r#type: ConversationItemType::Message,
|
||||
role: ConversationRole::User,
|
||||
content: vec![ConversationItemContent {
|
||||
r#type: ConversationContentType::Text,
|
||||
r#type: ConversationContentType::InputText,
|
||||
text,
|
||||
}],
|
||||
}),
|
||||
|
||||
@@ -199,7 +199,6 @@ pub(super) struct ConversationItemContent {
|
||||
#[derive(Debug, Clone, Copy, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(super) enum ConversationContentType {
|
||||
Text,
|
||||
InputText,
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,8 @@ pub(super) fn parse_realtime_event_v1(payload: &str) -> Option<RealtimeEvent> {
|
||||
| "conversation.item.input_audio_transcription.delta" => {
|
||||
parse_transcript_delta_event(&parsed, "delta").map(RealtimeEvent::InputTranscriptDelta)
|
||||
}
|
||||
"conversation.item.input_audio_transcription.completed" => {
|
||||
"conversation.input_transcript.turn_marked"
|
||||
| "conversation.item.input_audio_transcription.completed" => {
|
||||
parse_transcript_done_event(&parsed, "transcript")
|
||||
.map(RealtimeEvent::InputTranscriptDone)
|
||||
}
|
||||
|
||||
@@ -641,12 +641,14 @@ async fn prepare_realtime_start(
|
||||
realtime_request_headers(
|
||||
requested_realtime_session_id.as_deref(),
|
||||
Some(realtime_api_key.as_str()),
|
||||
version,
|
||||
)?
|
||||
}
|
||||
ConversationStartTransport::Webrtc { .. } => {
|
||||
realtime_request_headers(
|
||||
requested_realtime_session_id.as_deref(),
|
||||
/*api_key*/ None,
|
||||
version,
|
||||
)?
|
||||
}
|
||||
};
|
||||
@@ -973,9 +975,14 @@ fn realtime_api_key(auth: Option<&CodexAuth>, provider: &ModelProviderInfo) -> C
|
||||
fn realtime_request_headers(
|
||||
realtime_session_id: Option<&str>,
|
||||
api_key: Option<&str>,
|
||||
version: RealtimeWsVersion,
|
||||
) -> CodexResult<Option<HeaderMap>> {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
if version == RealtimeWsVersion::V1 {
|
||||
headers.insert("openai-alpha", HeaderValue::from_static("quicksilver=v1"));
|
||||
}
|
||||
|
||||
if let Some(realtime_session_id) = realtime_session_id
|
||||
&& let Ok(realtime_session_id) = HeaderValue::from_str(realtime_session_id)
|
||||
{
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use super::RealtimeHandoffState;
|
||||
use super::RealtimeSessionKind;
|
||||
use super::realtime_delegation_from_handoff;
|
||||
use super::realtime_request_headers;
|
||||
use super::realtime_text_from_handoff_request;
|
||||
use super::wrap_realtime_delegation_input;
|
||||
use async_channel::bounded;
|
||||
use codex_config::config_toml::RealtimeWsVersion;
|
||||
use codex_protocol::protocol::RealtimeHandoffRequested;
|
||||
use codex_protocol::protocol::RealtimeTranscriptEntry;
|
||||
use pretty_assertions::assert_eq;
|
||||
@@ -137,3 +139,28 @@ async fn clears_active_handoff_explicitly() {
|
||||
*state.active_handoff.lock().await = None;
|
||||
assert_eq!(state.active_handoff.lock().await.clone(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uses_quicksilver_alpha_header_for_realtime_v1() {
|
||||
let headers =
|
||||
realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V1)
|
||||
.expect("headers")
|
||||
.expect("headers");
|
||||
|
||||
assert_eq!(
|
||||
headers
|
||||
.get("openai-alpha")
|
||||
.and_then(|value| value.to_str().ok()),
|
||||
Some("quicksilver=v1")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn omits_quicksilver_alpha_header_for_realtime_v2() {
|
||||
let headers =
|
||||
realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V2)
|
||||
.expect("headers")
|
||||
.expect("headers");
|
||||
|
||||
assert!(headers.get("openai-alpha").is_none());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user