[stack 1/4] Split realtime websocket methods by version (#14828)

## Stack Position 1/4. Base PR in the realtime stack. ## Base - `main` ## Unblocks - #14830 ## Scope - Split the realtime websocket request builders into `common`, `v1`, and `v2` modules. - Keep runtime behavior unchanged in this PR. --------- Co-authored-by: Codex <noreply@openai.com>
2026-04-29 08:56:38 +00:00 · 2026-03-16 16:00:59 -07:00
parent a3ba10b44b
commit 6f05d8d735
5 changed files with 250 additions and 118 deletions
--- a/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v2.rs
+++ b/codex-rs/codex-api/src/endpoint/realtime_websocket/methods_v2.rs
@@ -0,0 +1,103 @@
+use crate::endpoint::realtime_websocket::methods_common::REALTIME_AUDIO_FORMAT;
+use crate::endpoint::realtime_websocket::methods_common::REALTIME_AUDIO_SAMPLE_RATE;
+use crate::endpoint::realtime_websocket::protocol::ConversationFunctionCallOutputItem;
+use crate::endpoint::realtime_websocket::protocol::ConversationItemContent;
+use crate::endpoint::realtime_websocket::protocol::ConversationItemPayload;
+use crate::endpoint::realtime_websocket::protocol::ConversationMessageItem;
+use crate::endpoint::realtime_websocket::protocol::RealtimeOutboundMessage;
+use crate::endpoint::realtime_websocket::protocol::RealtimeSessionMode;
+use crate::endpoint::realtime_websocket::protocol::SessionAudio;
+use crate::endpoint::realtime_websocket::protocol::SessionAudioFormat;
+use crate::endpoint::realtime_websocket::protocol::SessionAudioInput;
+use crate::endpoint::realtime_websocket::protocol::SessionAudioOutput;
+use crate::endpoint::realtime_websocket::protocol::SessionAudioVoice;
+use crate::endpoint::realtime_websocket::protocol::SessionFunctionTool;
+use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
+use serde_json::json;
+
+const REALTIME_V2_SESSION_TYPE: &str = "realtime";
+const REALTIME_V2_CODEX_TOOL_NAME: &str = "codex";
+const REALTIME_V2_CODEX_TOOL_DESCRIPTION: &str = "Delegate work to Codex and return the result.";
+
+pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutboundMessage {
+    RealtimeOutboundMessage::ConversationItemCreate {
+        item: ConversationItemPayload::Message(ConversationMessageItem {
+            kind: "message".to_string(),
+            role: "user".to_string(),
+            content: vec![ConversationItemContent {
+                kind: "input_text".to_string(),
+                text,
+            }],
+        }),
+    }
+}
+
+pub(super) fn conversation_handoff_append_message(
+    handoff_id: String,
+    output_text: String,
+) -> RealtimeOutboundMessage {
+    RealtimeOutboundMessage::ConversationItemCreate {
+        item: ConversationItemPayload::FunctionCallOutput(ConversationFunctionCallOutputItem {
+            kind: "function_call_output".to_string(),
+            call_id: handoff_id,
+            output: output_text,
+        }),
+    }
+}
+
+pub(super) fn session_update_session(
+    instructions: String,
+    session_mode: RealtimeSessionMode,
+) -> SessionUpdateSession {
+    match session_mode {
+        RealtimeSessionMode::Conversational => SessionUpdateSession {
+            kind: REALTIME_V2_SESSION_TYPE.to_string(),
+            instructions: Some(instructions),
+            audio: SessionAudio {
+                input: SessionAudioInput {
+                    format: SessionAudioFormat {
+                        kind: REALTIME_AUDIO_FORMAT.to_string(),
+                        rate: REALTIME_AUDIO_SAMPLE_RATE,
+                    },
+                },
+                output: Some(SessionAudioOutput {
+                    voice: SessionAudioVoice::Alloy,
+                }),
+            },
+            tools: Some(vec![SessionFunctionTool {
+                kind: "function".to_string(),
+                name: REALTIME_V2_CODEX_TOOL_NAME.to_string(),
+                description: REALTIME_V2_CODEX_TOOL_DESCRIPTION.to_string(),
+                parameters: json!({
+                    "type": "object",
+                    "properties": {
+                        "prompt": {
+                            "type": "string",
+                            "description": "Prompt text for the delegated Codex task."
+                        }
+                    },
+                    "required": ["prompt"],
+                    "additionalProperties": false
+                }),
+            }]),
+        },
+        RealtimeSessionMode::Transcription => SessionUpdateSession {
+            kind: "transcription".to_string(),
+            instructions: None,
+            audio: SessionAudio {
+                input: SessionAudioInput {
+                    format: SessionAudioFormat {
+                        kind: REALTIME_AUDIO_FORMAT.to_string(),
+                        rate: REALTIME_AUDIO_SAMPLE_RATE,
+                    },
+                },
+                output: None,
+            },
+            tools: None,
+        },
+    }
+}
+
+pub(super) fn websocket_intent() -> Option<&'static str> {
+    None
+}