diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 2009300325..93db88947b 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1676,6 +1676,7 @@ dependencies = [ "codex-config", "codex-core", "codex-exec", + "codex-exec-server", "codex-execpolicy", "codex-features", "codex-login", @@ -1994,6 +1995,7 @@ dependencies = [ "uuid", "walkdir", "which 8.0.0", + "whoami", "windows-sys 0.52.0", "wiremock", "zip 2.4.2", @@ -11762,6 +11764,7 @@ checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" dependencies = [ "libredox", "wasite", + "web-sys", ] [[package]] diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index d13de34d9e..32ae50bfb7 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -352,6 +352,7 @@ vt100 = "0.16.2" walkdir = "2.5.0" webbrowser = "1.0" which = "8" +whoami = "1.6.1" wildmatch = "2.6.1" zip = "2.4.2" zstd = "0.13" diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index 0f02f9bf7e..4610024770 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -1761,7 +1761,7 @@ mod tests { request_id: RequestId::Integer(9), params: v2::ThreadRealtimeStartParams { thread_id: "thr_123".to_string(), - prompt: "You are on a call".to_string(), + prompt: Some(Some("You are on a call".to_string())), session_id: Some("sess_456".to_string()), transport: None, }, @@ -1782,6 +1782,85 @@ mod tests { Ok(()) } + #[test] + fn serialize_thread_realtime_start_prompt_default_and_null() -> Result<()> { + let default_prompt_request = ClientRequest::ThreadRealtimeStart { + request_id: RequestId::Integer(9), + params: v2::ThreadRealtimeStartParams { + thread_id: "thr_123".to_string(), + prompt: None, + session_id: None, + transport: None, + }, + }; + assert_eq!( + json!({ + "method": "thread/realtime/start", + "id": 9, + "params": { + "threadId": "thr_123", + "sessionId": null, + "transport": null + } + }), + serde_json::to_value(&default_prompt_request)?, + ); + + let null_prompt_request = ClientRequest::ThreadRealtimeStart { + request_id: RequestId::Integer(9), + params: v2::ThreadRealtimeStartParams { + thread_id: "thr_123".to_string(), + prompt: Some(None), + session_id: None, + transport: None, + }, + }; + assert_eq!( + json!({ + "method": "thread/realtime/start", + "id": 9, + "params": { + "threadId": "thr_123", + "prompt": null, + "sessionId": null, + "transport": null + } + }), + serde_json::to_value(&null_prompt_request)?, + ); + + let default_prompt_value = json!({ + "method": "thread/realtime/start", + "id": 9, + "params": { + "threadId": "thr_123", + "sessionId": null, + "transport": null + } + }); + assert_eq!( + serde_json::from_value::(default_prompt_value)?, + default_prompt_request, + ); + + let null_prompt_value = json!({ + "method": "thread/realtime/start", + "id": 9, + "params": { + "threadId": "thr_123", + "prompt": null, + "sessionId": null, + "transport": null + } + }); + assert_eq!( + serde_json::from_value::(null_prompt_value)?, + null_prompt_request, + ); + + Ok(()) + } + #[test] fn serialize_thread_status_changed_notification() -> Result<()> { let notification = @@ -1852,7 +1931,7 @@ mod tests { request_id: RequestId::Integer(1), params: v2::ThreadRealtimeStartParams { thread_id: "thr_123".to_string(), - prompt: "You are on a call".to_string(), + prompt: Some(Some("You are on a call".to_string())), session_id: None, transport: None, }, diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 8ff5f18cf7..8d485ff829 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -3854,7 +3854,14 @@ impl From for CoreRealtimeAudioFrame { #[ts(export_to = "v2/")] pub struct ThreadRealtimeStartParams { pub thread_id: String, - pub prompt: String, + #[serde( + default, + deserialize_with = "super::serde_helpers::deserialize_double_option", + serialize_with = "super::serde_helpers::serialize_double_option", + skip_serializing_if = "Option::is_none" + )] + #[ts(optional = nullable)] + pub prompt: Option>, #[ts(optional = nullable)] pub session_id: Option, #[ts(optional = nullable)] diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 10c580d65a..609a792f3d 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -601,6 +601,9 @@ Then send `offer.sdp` to app-server. Core uses `experimental_realtime_ws_backend } } ``` +Omit `prompt` to use Codex's default realtime backend prompt. Send `prompt: null` or +`prompt: ""` when the session should start without that default backend prompt. + ```javascript await pc.setRemoteDescription({ type: "answer", diff --git a/codex-rs/app-server/tests/suite/v2/experimental_api.rs b/codex-rs/app-server/tests/suite/v2/experimental_api.rs index cd683b40b9..ea18b17544 100644 --- a/codex-rs/app-server/tests/suite/v2/experimental_api.rs +++ b/codex-rs/app-server/tests/suite/v2/experimental_api.rs @@ -74,7 +74,7 @@ async fn realtime_conversation_start_requires_experimental_api_capability() -> R let request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: "thr_123".to_string(), - prompt: "hello".to_string(), + prompt: Some(Some("hello".to_string())), session_id: None, transport: None, }) @@ -109,7 +109,7 @@ async fn realtime_webrtc_start_requires_experimental_api_capability() -> Result< let request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: "thr_123".to_string(), - prompt: "hello".to_string(), + prompt: Some(Some("hello".to_string())), session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { sdp: "v=offer\r\n".to_string(), diff --git a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs index c37e924277..3ffeed03d5 100644 --- a/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs +++ b/codex-rs/app-server/tests/suite/v2/realtime_conversation.rs @@ -513,7 +513,7 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: thread_start.thread.id.clone(), - prompt: "backend prompt".to_string(), + prompt: None, session_id: None, transport: None, }) @@ -539,12 +539,13 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { startup_context_request.body_json()["type"].as_str(), Some("session.update") ); - assert!( + let startup_context_instructions = startup_context_request.body_json()["session"]["instructions"] .as_str() .context("expected startup context instructions")? - .contains(STARTUP_CONTEXT_HEADER) - ); + .to_string(); + assert!(startup_context_instructions.starts_with("backend prompt")); + assert!(startup_context_instructions.contains(STARTUP_CONTEXT_HEADER)); let audio_append_request_id = mcp .send_thread_realtime_append_audio_request(ThreadRealtimeAppendAudioParams { @@ -650,11 +651,9 @@ async fn realtime_conversation_streams_v2_notifications() -> Result<()> { connection[0].body_json()["type"].as_str(), Some("session.update") ); - assert!( - connection[0].body_json()["session"]["instructions"] - .as_str() - .context("expected startup context instructions")? - .contains(STARTUP_CONTEXT_HEADER) + assert_eq!( + connection[0].body_json()["session"]["instructions"].as_str(), + Some(startup_context_instructions.as_str()), ); let mut request_types = [ connection[1].body_json()["type"] @@ -724,7 +723,7 @@ async fn realtime_conversation_stop_emits_closed_notification() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: thread_start.thread.id.clone(), - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, }) @@ -819,7 +818,7 @@ async fn realtime_webrtc_start_emits_sdp_notification() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: thread_id.clone(), - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { sdp: "v=offer\r\n".to_string(), @@ -1379,7 +1378,7 @@ async fn realtime_webrtc_start_surfaces_backend_error() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: thread_start.thread.id, - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: Some(ThreadRealtimeStartTransport::Webrtc { sdp: "v=offer\r\n".to_string(), @@ -1436,7 +1435,7 @@ async fn realtime_conversation_requires_feature_flag() -> Result<()> { let start_request_id = mcp .send_thread_realtime_start_request(ThreadRealtimeStartParams { thread_id: thread_start.thread.id.clone(), - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, }) diff --git a/codex-rs/core/BUILD.bazel b/codex-rs/core/BUILD.bazel index 9141579abc..ed01996273 100644 --- a/codex-rs/core/BUILD.bazel +++ b/codex-rs/core/BUILD.bazel @@ -29,6 +29,7 @@ codex_rust_crate( }, integration_compile_data_extra = [ "//codex-rs/apply-patch:apply_patch_tool_instructions.md", + "templates/realtime/backend_prompt.md", ], integration_test_timeout = "long", test_data_extra = [ diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index d2cbad83ef..55ce13afdc 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -116,6 +116,7 @@ tracing = { workspace = true, features = ["log"] } url = { workspace = true } uuid = { workspace = true, features = ["serde", "v4", "v5"] } which = { workspace = true } +whoami = { workspace = true } zip = { workspace = true } [target.'cfg(target_os = "macos")'.dependencies] diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 83232de734..efae8ae918 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -13,6 +13,7 @@ mod client_common; pub(crate) mod codex; mod realtime_context; mod realtime_conversation; +mod realtime_prompt; pub use codex::SteerInputError; mod codex_thread; mod compact_remote; diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs index fdcac70da0..17b2440e8d 100644 --- a/codex-rs/core/src/realtime_conversation.rs +++ b/codex-rs/core/src/realtime_conversation.rs @@ -1,6 +1,7 @@ use crate::client::ModelClient; use crate::codex::Session; use crate::realtime_context::build_realtime_startup_context; +use crate::realtime_prompt::prepare_realtime_backend_prompt; use async_channel::Receiver; use async_channel::Sender; use async_channel::TrySendError; @@ -546,14 +547,14 @@ async fn prepare_realtime_start( pub(crate) async fn build_realtime_session_config( sess: &Arc, - prompt: String, + prompt: Option>, session_id: Option, ) -> CodexResult { let config = sess.get_config().await; - let prompt = config - .experimental_realtime_ws_backend_prompt - .clone() - .unwrap_or(prompt); + let prompt = prepare_realtime_backend_prompt( + prompt, + config.experimental_realtime_ws_backend_prompt.clone(), + ); let startup_context = match config.experimental_realtime_ws_startup_context.clone() { Some(startup_context) => startup_context, None => { @@ -562,10 +563,11 @@ pub(crate) async fn build_realtime_session_config( .unwrap_or_default() } }; - let prompt = if startup_context.is_empty() { - prompt - } else { - format!("{prompt}\n\n{startup_context}") + let prompt = match (prompt.is_empty(), startup_context.is_empty()) { + (true, true) => String::new(), + (true, false) => startup_context, + (false, true) => prompt, + (false, false) => format!("{prompt}\n\n{startup_context}"), }; let model = config.experimental_realtime_ws_model.clone(); let event_parser = match config.realtime.version { diff --git a/codex-rs/core/src/realtime_prompt.rs b/codex-rs/core/src/realtime_prompt.rs new file mode 100644 index 0000000000..d2b7570669 --- /dev/null +++ b/codex-rs/core/src/realtime_prompt.rs @@ -0,0 +1,81 @@ +const BACKEND_PROMPT: &str = include_str!("../templates/realtime/backend_prompt.md"); +const DEFAULT_USER_FIRST_NAME: &str = "there"; +const USER_FIRST_NAME_PLACEHOLDER: &str = "{{ user_first_name }}"; + +pub(crate) fn prepare_realtime_backend_prompt( + prompt: Option>, + config_prompt: Option, +) -> String { + if let Some(config_prompt) = config_prompt + && !config_prompt.trim().is_empty() + { + return config_prompt; + } + + match prompt { + Some(Some(prompt)) => return prompt, + Some(None) => return String::new(), + None => {} + } + + BACKEND_PROMPT + .trim_end() + .replace(USER_FIRST_NAME_PLACEHOLDER, ¤t_user_first_name()) +} + +fn current_user_first_name() -> String { + [whoami::realname(), whoami::username()] + .into_iter() + .filter_map(|name| name.split_whitespace().next().map(str::to_string)) + .find(|name| !name.is_empty()) + .unwrap_or_else(|| DEFAULT_USER_FIRST_NAME.to_string()) +} + +#[cfg(test)] +mod tests { + use super::prepare_realtime_backend_prompt; + + #[test] + fn prepare_realtime_backend_prompt_prefers_config_override() { + assert_eq!( + prepare_realtime_backend_prompt( + Some(Some("prompt from request".to_string())), + Some("prompt from config".to_string()), + ), + "prompt from config" + ); + } + + #[test] + fn prepare_realtime_backend_prompt_uses_request_prompt() { + assert_eq!( + prepare_realtime_backend_prompt( + Some(Some("prompt from request".to_string())), + /*config_prompt*/ None, + ), + "prompt from request" + ); + } + + #[test] + fn prepare_realtime_backend_prompt_preserves_empty_request_prompt() { + assert_eq!( + prepare_realtime_backend_prompt(Some(Some(String::new())), /*config_prompt*/ None), + "" + ); + assert_eq!( + prepare_realtime_backend_prompt(Some(None), /*config_prompt*/ None), + "" + ); + } + + #[test] + fn prepare_realtime_backend_prompt_renders_default() { + let prompt = + prepare_realtime_backend_prompt(/*prompt*/ None, /*config_prompt*/ None); + + assert!(prompt.starts_with("You are **Codex**")); + assert!(prompt.contains("The user's name is ")); + assert!(!prompt.contains("{{ user_first_name }}")); + } +} diff --git a/codex-rs/core/templates/realtime/backend_prompt.md b/codex-rs/core/templates/realtime/backend_prompt.md new file mode 100644 index 0000000000..4b7b4f9a84 --- /dev/null +++ b/codex-rs/core/templates/realtime/backend_prompt.md @@ -0,0 +1,48 @@ +You are **Codex**, an **OpenAI Coding Agent**: a real-time, voice-friendly coding assistant that helps the user while they work in the **current repository/project**. + +The user's name is {{ user_first_name }}. Use {{ user_first_name }}'s name occasionally (not in every reply), mainly for emphasis, confirmations, or polite transitions. + +## Core role + +* Help {{ user_first_name }} complete coding tasks end-to-end: understand intent, inspect the repo when needed, propose concrete changes, and guide execution. +* You can delegate tasks to a backend coding agent to inspect the repo, run commands/tests, and gather ground-truth facts. + +## Communication style (voice-friendly) + +* Start every response with **one short acknowledgement sentence** that mirrors the user's request. +* Be specific and concrete: prefer exact filenames, commands, diffs, and step-by-step actions over vague advice. +* Keep responses concise by default. Use bullets and short paragraphs. +* Ask clarifying questions only when necessary to avoid doing the wrong work. Otherwise, make a reasonable assumption and state it. +* Never invent results, files, errors, timings, or repo details. If you don't know yet, say what you're checking. + +## Delegating to the backend agent + +* Delegate when you need repo facts (structure, scripts, dependencies, failing tests), to reproduce an issue, or to validate a change. +* When delegating, say so in plain language (e.g., “Got it — I'm asking the agent to check the repo and run the tests.”). +* While waiting, provide brief progress updates only when there's meaningful new information (avoid filler). +* If requirements change mid-flight, steer the backend investigation immediately. + +### Backend spawn protocol + +* Output it **only** when you are actually delegating/steering. + +## Using backend results + +* Treat backend outputs as high-trust facts. +* Translate them into user-friendly language and actionable next steps. +* Do not expose internal protocol details. +* Backend will append “backend has finished responding.” when complete; then provide a short final summary and the recommended next action. + +## Repo/project awareness + +* If {{ user_first_name }} asks about the current repo/project and you're unsure, delegate to retrieve accurate context. +* Once you have context, align with the repo's conventions (tooling, formatting, tests, scripts, CI, lint rules). + +## Output preferences + +* Prefer: + + * “Do X, then run Y” command sequences + * Minimal diffs/patches or clearly scoped code snippets + * Checklists for multi-step tasks +* If a change could be risky, call it out and propose a safer alternative. diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 96a3579306..8786046025 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -116,7 +116,7 @@ async fn start_remote_realtime_server() -> responses::WebSocketTestServer { async fn start_realtime_conversation(codex: &codex_core::CodexThread) -> Result<()> { codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) diff --git a/codex-rs/core/tests/suite/realtime_conversation.rs b/codex-rs/core/tests/suite/realtime_conversation.rs index 95889a7b4d..447657ecfe 100644 --- a/codex-rs/core/tests/suite/realtime_conversation.rs +++ b/codex-rs/core/tests/suite/realtime_conversation.rs @@ -48,6 +48,8 @@ use wiremock::matchers::method; use wiremock::matchers::path_regex; const STARTUP_CONTEXT_HEADER: &str = "Startup context from Codex."; +const REALTIME_BACKEND_PROMPT: &str = include_str!("../../templates/realtime/backend_prompt.md"); +const USER_FIRST_NAME_PLACEHOLDER: &str = "{{ user_first_name }}"; const MEMORY_PROMPT_PHRASE: &str = "You have access to a memory folder with guidance from prior runs."; const REALTIME_CONVERSATION_TEST_SUBPROCESS_ENV_VAR: &str = @@ -101,6 +103,20 @@ fn websocket_request_instructions( .map(str::to_owned) } +fn expected_realtime_backend_prompt() -> String { + REALTIME_BACKEND_PROMPT + .trim_end() + .replace(USER_FIRST_NAME_PLACEHOLDER, &test_user_first_name()) +} + +fn test_user_first_name() -> String { + [whoami::realname(), whoami::username()] + .into_iter() + .filter_map(|name| name.split_whitespace().next().map(str::to_string)) + .find(|name| !name.is_empty()) + .unwrap_or_else(|| "there".to_string()) +} + async fn wait_for_matching_websocket_request( server: &core_test_support::responses::WebSocketTestServer, description: &str, @@ -224,7 +240,7 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -375,7 +391,7 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: Some(ConversationStartTransport::Webrtc { sdp: "v=offer\r\n".to_string(), @@ -511,7 +527,7 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -571,7 +587,7 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -655,7 +671,7 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -696,7 +712,7 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -785,7 +801,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "old".to_string(), + prompt: Some(Some("old".to_string())), session_id: Some("conv_old".to_string()), transport: None, })) @@ -802,7 +818,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "new".to_string(), + prompt: Some(Some("new".to_string())), session_id: Some("conv_new".to_string()), transport: None, })) @@ -889,7 +905,7 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -919,6 +935,132 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_websocket_server(vec![ + vec![], + vec![vec![json!({ + "type": "session.updated", + "session": { "id": "sess_default", "instructions": "default" } + })]], + ]) + .await; + + let mut builder = test_codex().with_config(|config| { + config.experimental_realtime_ws_startup_context = + Some("controlled startup context".to_string()); + }); + let test = builder.build_with_websocket_server(&server).await?; + assert!( + server + .wait_for_handshakes(/*expected*/ 1, Duration::from_secs(2)) + .await + ); + + test.codex + .submit(Op::RealtimeConversationStart(ConversationStartParams { + prompt: None, + session_id: None, + transport: None, + })) + .await?; + + let session_updated = wait_for_event_match(&test.codex, |msg| match msg { + EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent { + payload: RealtimeEvent::SessionUpdated { session_id, .. }, + }) => Some(session_id.clone()), + _ => None, + }) + .await; + assert_eq!(session_updated, "sess_default"); + + let connections = server.connections(); + assert_eq!(connections.len(), 2); + let instructions = + websocket_request_instructions(&connections[1][0]).expect("default session instructions"); + assert_eq!( + instructions, + format!( + "{}\n\ncontrolled startup context", + expected_realtime_backend_prompt() + ) + ); + + server.shutdown().await; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_websocket_server(vec![ + vec![], + vec![vec![json!({ + "type": "session.updated", + "session": { "id": "sess_null", "instructions": "" } + })]], + vec![vec![json!({ + "type": "session.updated", + "session": { "id": "sess_empty", "instructions": "" } + })]], + ]) + .await; + + let mut builder = test_codex().with_config(|config| { + config.experimental_realtime_ws_startup_context = Some(String::new()); + }); + let test = builder.build_with_websocket_server(&server).await?; + assert!( + server + .wait_for_handshakes(/*expected*/ 1, Duration::from_secs(2)) + .await + ); + + for (prompt, expected_session_id) in [ + (Some(None), "sess_null"), + (Some(Some(String::new())), "sess_empty"), + ] { + test.codex + .submit(Op::RealtimeConversationStart(ConversationStartParams { + prompt, + session_id: None, + transport: None, + })) + .await?; + + let session_updated = wait_for_event_match(&test.codex, |msg| match msg { + EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent { + payload: RealtimeEvent::SessionUpdated { session_id, .. }, + }) => Some(session_id.clone()), + _ => None, + }) + .await; + assert_eq!(session_updated, expected_session_id); + + test.codex.submit(Op::RealtimeConversationClose).await?; + let _closed = wait_for_event_match(&test.codex, |msg| match msg { + EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()), + _ => None, + }) + .await; + } + + let connections = server.connections(); + assert_eq!(connections.len(), 3); + let null_instructions = + websocket_request_instructions(&connections[1][0]).expect("null prompt instructions"); + let empty_instructions = + websocket_request_instructions(&connections[2][0]).expect("empty prompt instructions"); + assert_eq!(null_instructions, ""); + assert_eq!(empty_instructions, ""); + + server.shutdown().await; + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() -> Result<()> { skip_if_no_network!(Ok(())); @@ -944,7 +1086,7 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "prompt from op".to_string(), + prompt: Some(Some("prompt from op".to_string())), session_id: None, transport: None, })) @@ -1007,7 +1149,7 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() - test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "prompt from op".to_string(), + prompt: Some(Some("prompt from op".to_string())), session_id: None, transport: None, })) @@ -1068,7 +1210,7 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "prompt from op".to_string(), + prompt: Some(Some("prompt from op".to_string())), session_id: None, transport: None, })) @@ -1122,7 +1264,7 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1176,7 +1318,7 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<() test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1228,7 +1370,7 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1313,7 +1455,7 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1440,7 +1582,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() -> test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1582,7 +1724,7 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1677,7 +1819,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1770,7 +1912,7 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() - test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1870,7 +2012,7 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio( test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -1983,7 +2125,7 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -2126,7 +2268,7 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -2253,7 +2395,7 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> { test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) @@ -2395,7 +2537,7 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio() test.codex .submit(Op::RealtimeConversationStart(ConversationStartParams { - prompt: "backend prompt".to_string(), + prompt: Some(Some("backend prompt".to_string())), session_id: None, transport: None, })) diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 239d3c61de..8f50df24dc 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -133,7 +133,13 @@ pub struct McpServerRefreshConfig { #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)] pub struct ConversationStartParams { - pub prompt: String, + #[serde( + default, + deserialize_with = "conversation_start_prompt_serde::deserialize", + serialize_with = "conversation_start_prompt_serde::serialize", + skip_serializing_if = "Option::is_none" + )] + pub prompt: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub session_id: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -148,6 +154,28 @@ pub enum ConversationStartTransport { Webrtc { sdp: String }, } +mod conversation_start_prompt_serde { + use serde::Deserializer; + use serde::Serializer; + + pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result>, D::Error> + where + D: Deserializer<'de>, + { + serde_with::rust::double_option::deserialize(deserializer) + } + + pub(crate) fn serialize( + value: &Option>, + serializer: S, + ) -> Result + where + S: Serializer, + { + serde_with::rust::double_option::serialize(value, serializer) + } +} + #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] pub struct RealtimeAudioFrame { pub data: String, @@ -4410,12 +4438,12 @@ mod tests { }, }); let start = Op::RealtimeConversationStart(ConversationStartParams { - prompt: "be helpful".to_string(), + prompt: Some(Some("be helpful".to_string())), session_id: Some("conv_1".to_string()), transport: None, }); let webrtc_start = Op::RealtimeConversationStart(ConversationStartParams { - prompt: "be helpful".to_string(), + prompt: Some(Some("be helpful".to_string())), session_id: Some("conv_1".to_string()), transport: Some(ConversationStartTransport::Webrtc { sdp: "v=offer\r\n".to_string(), @@ -4425,6 +4453,16 @@ mod tests { text: "hello".to_string(), }); let close = Op::RealtimeConversationClose; + let default_prompt_start = Op::RealtimeConversationStart(ConversationStartParams { + prompt: None, + session_id: None, + transport: None, + }); + let null_prompt_start = Op::RealtimeConversationStart(ConversationStartParams { + prompt: Some(None), + session_id: None, + transport: None, + }); assert_eq!( serde_json::to_value(&start).unwrap(), @@ -4434,6 +4472,34 @@ mod tests { "session_id": "conv_1" }) ); + assert_eq!( + serde_json::to_value(&default_prompt_start).unwrap(), + json!({ + "type": "realtime_conversation_start" + }) + ); + assert_eq!( + serde_json::to_value(&null_prompt_start).unwrap(), + json!({ + "type": "realtime_conversation_start", + "prompt": null + }) + ); + assert_eq!( + serde_json::from_value::(json!({ + "type": "realtime_conversation_start" + })) + .unwrap(), + default_prompt_start + ); + assert_eq!( + serde_json::from_value::(json!({ + "type": "realtime_conversation_start", + "prompt": null + })) + .unwrap(), + null_prompt_start + ); assert_eq!( serde_json::to_value(&audio).unwrap(), json!({ diff --git a/codex-rs/tui/src/chatwidget/realtime.rs b/codex-rs/tui/src/chatwidget/realtime.rs index 1660a97cda..8906048d45 100644 --- a/codex-rs/tui/src/chatwidget/realtime.rs +++ b/codex-rs/tui/src/chatwidget/realtime.rs @@ -15,8 +15,6 @@ use std::sync::atomic::AtomicU16; #[cfg(not(target_os = "linux"))] use std::time::Duration; -const REALTIME_CONVERSATION_PROMPT: &str = "You are in a realtime voice conversation in the Codex TUI. Respond conversationally and concisely."; - #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub(super) enum RealtimeConversationPhase { #[default] @@ -262,7 +260,7 @@ impl ChatWidget { ) { self.submit_op(AppCommand::realtime_conversation_start( ConversationStartParams { - prompt: REALTIME_CONVERSATION_PROMPT.to_string(), + prompt: None, session_id: None, transport, },