mirror of
https://github.com/openai/codex.git
synced 2026-04-29 17:06:51 +00:00
Add realtime output modality and transcript events (#17701)
- Add outputModality to thread/realtime/start and wire text/audio output selection through app-server, core, API, and TUI.\n- Rename the realtime transcript delta notification and add a separate transcript done notification that forwards final text from item done without correlating it with deltas.
This commit is contained in:
@@ -21,6 +21,7 @@ use codex_protocol::protocol::RealtimeAudioFrame;
|
||||
use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
|
||||
use codex_protocol::protocol::RealtimeConversationVersion;
|
||||
use codex_protocol::protocol::RealtimeEvent;
|
||||
use codex_protocol::protocol::RealtimeOutputModality;
|
||||
use codex_protocol::protocol::RealtimeVoice;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
@@ -248,6 +249,7 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -381,6 +383,7 @@ async fn conversation_start_defaults_to_v2_and_gpt_realtime_1_5() -> Result<()>
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -464,6 +467,7 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: Some(ConversationStartTransport::Webrtc {
|
||||
@@ -601,6 +605,7 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() ->
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -662,6 +667,7 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -747,6 +753,7 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -790,6 +797,7 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -880,6 +888,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("old".to_string())),
|
||||
session_id: Some("conv_old".to_string()),
|
||||
transport: None,
|
||||
@@ -898,6 +907,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("new".to_string())),
|
||||
session_id: Some("conv_new".to_string()),
|
||||
transport: None,
|
||||
@@ -987,6 +997,7 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1044,6 +1055,7 @@ async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: None,
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1109,6 +1121,7 @@ async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Resu
|
||||
] {
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt,
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1167,6 +1180,7 @@ async fn conversation_uses_explicit_start_voice() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1217,6 +1231,7 @@ async fn conversation_uses_configured_realtime_voice() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1255,6 +1270,7 @@ async fn conversation_rejects_voice_for_wrong_realtime_version() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1298,6 +1314,7 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() ->
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("prompt from op".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1363,6 +1380,7 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() -
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("prompt from op".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1426,6 +1444,7 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() ->
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("prompt from op".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1482,6 +1501,7 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1593,6 +1613,7 @@ async fn conversation_startup_context_current_thread_selects_many_turns_by_budge
|
||||
|
||||
codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1697,6 +1718,7 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<()
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1751,6 +1773,7 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() ->
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1826,6 +1849,7 @@ async fn conversation_user_text_turn_is_sent_to_realtime_when_active() -> Result
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -1948,6 +1972,7 @@ async fn conversation_user_text_turn_is_capped_when_mirrored_to_realtime() -> Re
|
||||
// active WebSocket session.
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2075,6 +2100,7 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2204,6 +2230,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2348,6 +2375,7 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2445,6 +2473,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2540,6 +2569,7 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() -
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2642,6 +2672,7 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio(
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2757,6 +2788,7 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -2902,6 +2934,7 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -3032,6 +3065,7 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
@@ -3183,6 +3217,7 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
output_modality: RealtimeOutputModality::Audio,
|
||||
prompt: Some(Some("backend prompt".to_string())),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
|
||||
Reference in New Issue
Block a user