mirror of
https://github.com/openai/codex.git
synced 2026-04-26 23:55:25 +00:00
Handle realtime spawn_transcript delegation (#12619)
This commit is contained in:
@@ -566,7 +566,7 @@ fn message_input_texts(body: &Value, role: &str) -> Vec<String> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn inbound_realtime_text_starts_turn_and_ignores_role() -> Result<()> {
|
||||
async fn inbound_realtime_text_starts_turn_for_assistant_role() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let api_server = start_mock_server().await;
|
||||
@@ -589,7 +589,7 @@ async fn inbound_realtime_text_starts_turn_and_ignores_role() -> Result<()> {
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "text from realtime"}]
|
||||
}
|
||||
}),
|
||||
@@ -633,6 +633,321 @@ async fn inbound_realtime_text_starts_turn_and_ignores_role() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn inbound_realtime_text_ignores_user_role_and_still_forwards_audio() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let api_server = start_mock_server().await;
|
||||
|
||||
let realtime_server = start_websocket_server(vec![vec![vec![
|
||||
json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_ignore_user_role" }
|
||||
}),
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "echoed local text"}]
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
}),
|
||||
]]])
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config({
|
||||
let realtime_base_url = realtime_server.uri().to_string();
|
||||
move |config| {
|
||||
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&api_server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_ignore_user_role" => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let audio_out = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) => Some(frame.clone()),
|
||||
_ => None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.expect("timed out waiting for realtime audio after user-role conversation item");
|
||||
assert_eq!(audio_out.data, "AQID");
|
||||
|
||||
let unexpected_turn_started = tokio::time::timeout(
|
||||
Duration::from_millis(200),
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::TurnStarted(_) => Some(()),
|
||||
_ => None,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
assert!(unexpected_turn_started.is_err());
|
||||
|
||||
realtime_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_audio() -> Result<()>
|
||||
{
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let (gate_completed_tx, gate_completed_rx) = oneshot::channel();
|
||||
let first_chunks = vec![
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
body: sse_event(responses::ev_response_created("resp-1")),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
body: sse_event(responses::ev_assistant_message(
|
||||
"msg-1",
|
||||
"assistant says hi",
|
||||
)),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: Some(gate_completed_rx),
|
||||
body: sse_event(responses::ev_completed("resp-1")),
|
||||
},
|
||||
];
|
||||
let (api_server, completions) = start_streaming_sse_server(vec![first_chunks]).await;
|
||||
|
||||
let realtime_server = start_websocket_server(vec![vec![
|
||||
vec![
|
||||
json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_echo_guard" }
|
||||
}),
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "delegate now"}]
|
||||
}
|
||||
}),
|
||||
],
|
||||
vec![
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "assistant says hi"}]
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
}),
|
||||
],
|
||||
]])
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_model("gpt-5.1").with_config({
|
||||
let realtime_base_url = realtime_server.uri().to_string();
|
||||
move |config| {
|
||||
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
|
||||
}
|
||||
});
|
||||
let test = builder.build_with_streaming_server(&api_server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_echo_guard" => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::ConversationItemAdded(item),
|
||||
}) => item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.any(|content| content.get("text").and_then(Value::as_str) == Some("delegate now"))
|
||||
.then_some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let audio_out = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) => Some(frame.clone()),
|
||||
_ => None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.expect("timed out waiting for realtime audio after echoed user-role message");
|
||||
assert_eq!(audio_out.data, "AQID");
|
||||
|
||||
let completion = completions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("missing delegated turn completion");
|
||||
let _ = gate_completed_tx.send(());
|
||||
completion
|
||||
.await
|
||||
.expect("delegated turn request did not complete");
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let requests = api_server.requests().await;
|
||||
assert_eq!(requests.len(), 1);
|
||||
|
||||
realtime_server.shutdown().await;
|
||||
api_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn inbound_realtime_text_does_not_block_realtime_event_forwarding() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let (gate_completed_tx, gate_completed_rx) = oneshot::channel();
|
||||
let first_chunks = vec![
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
body: sse_event(responses::ev_response_created("resp-1")),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: Some(gate_completed_rx),
|
||||
body: sse_event(responses::ev_completed("resp-1")),
|
||||
},
|
||||
];
|
||||
let (api_server, completions) = start_streaming_sse_server(vec![first_chunks]).await;
|
||||
|
||||
let realtime_server = start_websocket_server(vec![vec![vec![
|
||||
json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_non_blocking" }
|
||||
}),
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "delegate now"}]
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
}),
|
||||
]]])
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_model("gpt-5.1").with_config({
|
||||
let realtime_base_url = realtime_server.uri().to_string();
|
||||
move |config| {
|
||||
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
|
||||
}
|
||||
});
|
||||
let test = builder.build_with_streaming_server(&api_server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_non_blocking" => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::ConversationItemAdded(item),
|
||||
}) => item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.any(|content| content.get("text").and_then(Value::as_str) == Some("delegate now"))
|
||||
.then_some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let audio_out = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) => Some(frame.clone()),
|
||||
_ => None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.expect("timed out waiting for realtime audio while delegated turn was still pending");
|
||||
assert_eq!(audio_out.data, "AQID");
|
||||
|
||||
let completion = completions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("missing delegated turn completion");
|
||||
let _ = gate_completed_tx.send(());
|
||||
completion
|
||||
.await
|
||||
.expect("delegated turn request did not complete");
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
realtime_server.shutdown().await;
|
||||
api_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn inbound_realtime_text_steers_active_turn() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -791,3 +1106,121 @@ async fn inbound_realtime_text_steers_active_turn() -> Result<()> {
|
||||
api_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn inbound_spawn_transcript_starts_turn_and_does_not_block_realtime_audio() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let (gate_completed_tx, gate_completed_rx) = oneshot::channel();
|
||||
let first_chunks = vec![
|
||||
StreamingSseChunk {
|
||||
gate: None,
|
||||
body: sse_event(responses::ev_response_created("resp-1")),
|
||||
},
|
||||
StreamingSseChunk {
|
||||
gate: Some(gate_completed_rx),
|
||||
body: sse_event(responses::ev_completed("resp-1")),
|
||||
},
|
||||
];
|
||||
let (api_server, completions) = start_streaming_sse_server(vec![first_chunks]).await;
|
||||
|
||||
let delegated_text = "delegate from spawn transcript";
|
||||
let realtime_server = start_websocket_server(vec![vec![vec![
|
||||
json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_spawn_transcript" }
|
||||
}),
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "spawn_transcript",
|
||||
"seq": 1,
|
||||
"full_user_transcript": delegated_text,
|
||||
"delta_user_transcript": delegated_text,
|
||||
"backend_prompt_messages": [{
|
||||
"role": "user",
|
||||
"channel": null,
|
||||
"content": delegated_text,
|
||||
"content_type": "text"
|
||||
}],
|
||||
"transcript_source": "backend_prompt_messages"
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
}),
|
||||
]]])
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_model("gpt-5.1").with_config({
|
||||
let realtime_base_url = realtime_server.uri().to_string();
|
||||
move |config| {
|
||||
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
|
||||
}
|
||||
});
|
||||
let test = builder.build_with_streaming_server(&api_server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_spawn_transcript" => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::ConversationItemAdded(item),
|
||||
}) => (item.get("type").and_then(Value::as_str) == Some("spawn_transcript")
|
||||
&& item.get("delta_user_transcript").and_then(Value::as_str) == Some(delegated_text))
|
||||
.then_some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let audio_out = tokio::time::timeout(
|
||||
Duration::from_millis(500),
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) => Some(frame.clone()),
|
||||
_ => None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.expect("timed out waiting for realtime audio after spawn_transcript");
|
||||
assert_eq!(audio_out.data, "AQID");
|
||||
|
||||
let completion = completions
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("missing delegated turn completion");
|
||||
let _ = gate_completed_tx.send(());
|
||||
completion
|
||||
.await
|
||||
.expect("delegated turn request did not complete");
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let requests = api_server.requests().await;
|
||||
assert_eq!(requests.len(), 1);
|
||||
let first_body: Value = serde_json::from_slice(&requests[0]).expect("parse first request");
|
||||
let first_texts = message_input_texts(&first_body, "user");
|
||||
assert!(first_texts.iter().any(|text| text == delegated_text));
|
||||
|
||||
realtime_server.shutdown().await;
|
||||
api_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user