diff --git a/codex-rs/codex-api/src/endpoint/realtime_websocket/methods.rs b/codex-rs/codex-api/src/endpoint/realtime_websocket/methods.rs index 9fcca1c3e3..ad26549a26 100644 --- a/codex-rs/codex-api/src/endpoint/realtime_websocket/methods.rs +++ b/codex-rs/codex-api/src/endpoint/realtime_websocket/methods.rs @@ -993,6 +993,22 @@ mod tests { ); } + #[test] + fn parse_v1_input_transcript_turn_marked_event() { + let payload = json!({ + "type": "conversation.input_transcript.turn_marked", + "transcript": "hello realtime" + }) + .to_string(); + + assert_eq!( + parse_realtime_event(payload.as_str(), RealtimeEventParser::V1), + Some(RealtimeEvent::InputTranscriptDone(RealtimeTranscriptDone { + text: "hello realtime".to_string(), + })) + ); + } + #[test] fn parse_output_transcript_delta_event() { let payload = json!({ @@ -1581,6 +1597,10 @@ mod tests { .expect("text"); let third_json: Value = serde_json::from_str(&third).expect("json"); assert_eq!(third_json["type"], "conversation.item.create"); + assert_eq!( + third_json["item"]["content"][0]["type"], + Value::String("input_text".to_string()) + ); assert_eq!(third_json["item"]["content"][0]["text"], "hello agent"); let fourth = ws diff --git a/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v1.rs b/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v1.rs index 19e4fa203a..0f1a269082 100644 --- a/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v1.rs +++ b/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v1.rs @@ -21,7 +21,7 @@ pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutbound r#type: ConversationItemType::Message, role: ConversationRole::User, content: vec![ConversationItemContent { - r#type: ConversationContentType::Text, + r#type: ConversationContentType::InputText, text, }], }), diff --git a/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol.rs b/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol.rs index d689f6ea96..5df4c0c503 100644 --- a/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol.rs +++ b/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol.rs @@ -199,7 +199,6 @@ pub(super) struct ConversationItemContent { #[derive(Debug, Clone, Copy, Serialize)] #[serde(rename_all = "snake_case")] pub(super) enum ConversationContentType { - Text, InputText, } diff --git a/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol_v1.rs b/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol_v1.rs index 3c1d25aed7..a464852244 100644 --- a/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol_v1.rs +++ b/codex-rs/codex-api/src/endpoint/realtime_websocket/protocol_v1.rs @@ -43,7 +43,8 @@ pub(super) fn parse_realtime_event_v1(payload: &str) -> Option { | "conversation.item.input_audio_transcription.delta" => { parse_transcript_delta_event(&parsed, "delta").map(RealtimeEvent::InputTranscriptDelta) } - "conversation.item.input_audio_transcription.completed" => { + "conversation.input_transcript.turn_marked" + | "conversation.item.input_audio_transcription.completed" => { parse_transcript_done_event(&parsed, "transcript") .map(RealtimeEvent::InputTranscriptDone) } diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs index 249b3ae15f..7f71142e19 100644 --- a/codex-rs/core/src/realtime_conversation.rs +++ b/codex-rs/core/src/realtime_conversation.rs @@ -641,12 +641,14 @@ async fn prepare_realtime_start( realtime_request_headers( requested_realtime_session_id.as_deref(), Some(realtime_api_key.as_str()), + version, )? } ConversationStartTransport::Webrtc { .. } => { realtime_request_headers( requested_realtime_session_id.as_deref(), /*api_key*/ None, + version, )? } }; @@ -973,9 +975,14 @@ fn realtime_api_key(auth: Option<&CodexAuth>, provider: &ModelProviderInfo) -> C fn realtime_request_headers( realtime_session_id: Option<&str>, api_key: Option<&str>, + version: RealtimeWsVersion, ) -> CodexResult> { let mut headers = HeaderMap::new(); + if version == RealtimeWsVersion::V1 { + headers.insert("openai-alpha", HeaderValue::from_static("quicksilver=v1")); + } + if let Some(realtime_session_id) = realtime_session_id && let Ok(realtime_session_id) = HeaderValue::from_str(realtime_session_id) { diff --git a/codex-rs/core/src/realtime_conversation_tests.rs b/codex-rs/core/src/realtime_conversation_tests.rs index a146c43869..b67205ef8f 100644 --- a/codex-rs/core/src/realtime_conversation_tests.rs +++ b/codex-rs/core/src/realtime_conversation_tests.rs @@ -1,9 +1,11 @@ use super::RealtimeHandoffState; use super::RealtimeSessionKind; use super::realtime_delegation_from_handoff; +use super::realtime_request_headers; use super::realtime_text_from_handoff_request; use super::wrap_realtime_delegation_input; use async_channel::bounded; +use codex_config::config_toml::RealtimeWsVersion; use codex_protocol::protocol::RealtimeHandoffRequested; use codex_protocol::protocol::RealtimeTranscriptEntry; use pretty_assertions::assert_eq; @@ -137,3 +139,28 @@ async fn clears_active_handoff_explicitly() { *state.active_handoff.lock().await = None; assert_eq!(state.active_handoff.lock().await.clone(), None); } + +#[test] +fn uses_quicksilver_alpha_header_for_realtime_v1() { + let headers = + realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V1) + .expect("headers") + .expect("headers"); + + assert_eq!( + headers + .get("openai-alpha") + .and_then(|value| value.to_str().ok()), + Some("quicksilver=v1") + ); +} + +#[test] +fn omits_quicksilver_alpha_header_for_realtime_v2() { + let headers = + realtime_request_headers(Some("session_1"), Some("sk-test"), RealtimeWsVersion::V2) + .expect("headers") + .expect("headers"); + + assert!(headers.get("openai-alpha").is_none()); +}