Queue Realtime V2 response.create while active (#17306)

Builds on #17264.

- queues Realtime V2 `response.create` while an active response is open,
then flushes it after `response.done` or `response.cancelled`
- requests `response.create` after background agent final output and
steering acknowledgements
- adds app-server integration coverage for all `response.create` paths

Validation:
- `just fmt`
- `cargo check -p codex-app-server --tests`
- `git diff --check`
- CI green

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-10 09:09:13 -07:00
committed by GitHub
parent 88165e179a
commit 2e81eac004
7 changed files with 454 additions and 40 deletions

View File

@@ -71,6 +71,8 @@ use wiremock::matchers::path_regex;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
const STARTUP_CONTEXT_HEADER: &str = "Startup context from Codex.";
const V2_STEERING_ACKNOWLEDGEMENT: &str =
"This was sent to steer the previous background agent task.";
#[derive(Debug, Clone, Copy)]
enum StartupContextConfig<'a> {
@@ -329,6 +331,8 @@ impl RealtimeE2eHarness {
read_notification(&mut self.mcp, method).await
}
/// Returns the nth JSON message app-server wrote to the fake Realtime API
/// sideband websocket.
async fn sideband_outbound_request(&self, request_index: usize) -> Value {
self.realtime_server
.wait_for_request(/*connection_index*/ 0, request_index)
@@ -1204,6 +1208,169 @@ async fn webrtc_v2_forwards_audio_and_text_between_client_and_sideband() -> Resu
Ok(())
}
/// Regression coverage for Realtime V2's single-active-response rule.
///
/// The Realtime API rejects a new `response.create` while a default response is
/// still active, so the input task should queue the second create and flush it
/// only after the server sends `response.done` for the active response.
#[tokio::test]
async fn webrtc_v2_queues_text_response_create_while_response_is_active() -> Result<()> {
skip_if_no_network!(Ok(()));
// Phase 1: script a server-side response that becomes active after the first
// user text turn, then finishes only after a later audio input.
let mut harness = RealtimeE2eHarness::new(
RealtimeTestVersion::V2,
no_main_loop_responses(),
realtime_sideband(vec![realtime_sideband_connection(vec![
vec![session_updated("sess_v2_response_queue")],
vec![],
vec![
json!({
"type": "response.created",
"response": { "id": "resp_active" }
}),
json!({
"type": "response.output_text.delta",
"delta": "active response started"
}),
],
vec![],
vec![json!({
"type": "response.done",
"response": { "id": "resp_active" }
})],
vec![],
])]),
)
.await?;
let started = harness.start_webrtc_realtime("v=offer\r\n").await?;
assert_eq!(started.started.version, RealtimeConversationVersion::V2);
// From here on, `sideband_outbound_request(n)` reads outbound messages to
// the fake Realtime API sideband websocket. These are not client-facing
// notifications; they are the protocol frames app-server sends upstream.
assert_v2_session_update(&harness.sideband_outbound_request(/*request_index*/ 0).await)?;
// Phase 2: send the first text turn. It is safe to emit `response.create`
// immediately because no default response is active yet.
let thread_id = started.started.thread_id.clone();
harness.append_text(thread_id.clone(), "first").await?;
assert_v2_user_text_item(
&harness.sideband_outbound_request(/*request_index*/ 1).await,
"first",
);
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 2).await);
let transcript = harness
.read_notification::<ThreadRealtimeTranscriptUpdatedNotification>(
"thread/realtime/transcriptUpdated",
)
.await?;
assert_eq!(transcript.text, "active response started");
// Phase 3: send a second text turn while `resp_active` is still open. The
// user message must reach realtime, but `response.create` must not be sent
// yet or the Realtime API rejects it as an active-response conflict.
harness.append_text(thread_id.clone(), "second").await?;
assert_v2_user_text_item(
&harness.sideband_outbound_request(/*request_index*/ 3).await,
"second",
);
// Phase 4: the audio input causes the scripted sideband stream to send
// `response.done`, which clears the active response and flushes the queued
// `response.create` for the second text turn.
harness.append_audio(thread_id).await?;
// This is the negative check: if the second text turn had emitted
// `response.create` immediately, request 4 would be that create instead of
// the audio append.
let audio = harness.sideband_outbound_request(/*request_index*/ 4).await;
assert_eq!(audio["type"], "input_audio_buffer.append");
assert_eq!(audio["audio"], "BQYH");
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 5).await);
harness.shutdown().await;
Ok(())
}
/// Regression coverage for the same queued `response.create` path when the
/// active Realtime V2 response is cancelled instead of completed.
///
/// `response.cancelled` should clear the active-response guard exactly like
/// `response.done`, so a text turn queued during the active response still gets
/// one deferred `response.create`.
#[tokio::test]
async fn webrtc_v2_flushes_queued_text_response_create_when_response_is_cancelled() -> Result<()> {
skip_if_no_network!(Ok(()));
// Phase 1: script a server-side response that becomes active after the first
// text turn, then is cancelled only after a later audio input.
let mut harness = RealtimeE2eHarness::new(
RealtimeTestVersion::V2,
no_main_loop_responses(),
realtime_sideband(vec![realtime_sideband_connection(vec![
vec![session_updated("sess_v2_response_cancel_queue")],
vec![],
vec![json!({
"type": "response.created",
"response": { "id": "resp_cancelled" }
})],
vec![],
vec![json!({
"type": "response.cancelled",
"response": { "id": "resp_cancelled" }
})],
vec![],
])]),
)
.await?;
let started = harness.start_webrtc_realtime("v=offer\r\n").await?;
assert_eq!(started.started.version, RealtimeConversationVersion::V2);
assert_v2_session_update(&harness.sideband_outbound_request(/*request_index*/ 0).await)?;
// Phase 2: send the first text turn. It is safe to emit `response.create`
// immediately because no default response is active yet.
let thread_id = started.started.thread_id.clone();
harness.append_text(thread_id.clone(), "first").await?;
assert_v2_user_text_item(
&harness.sideband_outbound_request(/*request_index*/ 1).await,
"first",
);
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 2).await);
// Phase 3: send a second text turn while `resp_cancelled` is still open.
// The user message must reach realtime, but `response.create` stays queued.
harness.append_text(thread_id.clone(), "second").await?;
assert_v2_user_text_item(
&harness.sideband_outbound_request(/*request_index*/ 3).await,
"second",
);
// Phase 4: the audio input causes the scripted sideband stream to send
// `response.cancelled`, which clears the active response and flushes the
// queued `response.create` for the second text turn.
harness.append_audio(thread_id).await?;
// This is the negative check: if the second text turn had emitted
// `response.create` immediately, request 4 would be that create instead of
// the audio append.
let audio = harness.sideband_outbound_request(/*request_index*/ 4).await;
assert_eq!(audio["type"], "input_audio_buffer.append");
assert_eq!(audio["audio"], "BQYH");
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 5).await);
harness.shutdown().await;
Ok(())
}
/// Regression coverage for the Realtime V2 background-agent final-output path.
///
/// Once the background agent finishes, app-server sends the final function-call
/// output to realtime and then requests a new `response.create` so realtime can
/// react to that final output.
#[tokio::test]
async fn webrtc_v2_background_agent_tool_call_delegates_and_returns_function_output() -> Result<()>
{
@@ -1223,6 +1390,7 @@ async fn webrtc_v2_background_agent_tool_call_delegates_and_returns_function_out
],
vec![],
vec![],
vec![],
])]),
)
.await?;
@@ -1240,8 +1408,8 @@ async fn webrtc_v2_background_agent_tool_call_delegates_and_returns_function_out
.await?;
assert_eq!(turn_completed.thread_id, harness.thread_id);
// Phase 3: assert the delegated prompt went to Responses and the result returned as exactly one
// v2 function-call output event on the sideband.
// Phase 3: assert the delegated prompt went to Responses and the result
// returned as exactly one v2 function-call output event on the sideband.
let requests = harness.main_loop_responses_requests().await?;
assert_eq!(requests.len(), 1);
assert!(
@@ -1260,6 +1428,99 @@ async fn webrtc_v2_background_agent_tool_call_delegates_and_returns_function_out
1
);
// Phase 4: after the final function-call output, realtime needs an explicit
// `response.create` to produce the next user-visible response.
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 3).await);
harness.shutdown().await;
Ok(())
}
/// Regression coverage for Realtime V2 steering while a background-agent task is
/// already active.
///
/// The second background-agent tool call is treated as guidance for the active
/// task. App-server acknowledges that steering message to realtime and then
/// emits `response.create` so realtime can speak that acknowledgement.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn webrtc_v2_background_agent_steering_ack_requests_response_create() -> Result<()> {
skip_if_no_network!(Ok(()));
// Phase 1: gate the delegated Responses turn from the first tool call so
// the background-agent handoff stays active while realtime sends a second
// tool call that should steer the active task.
let main_loop_responses_server = responses::start_mock_server().await;
let (gate_completed_tx, gate_completed_rx) = mpsc::channel();
let gated_response = responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_assistant_message("msg-1", "first task finished"),
responses::ev_completed("resp-1"),
]);
Mock::given(method("POST"))
.and(path_regex(".*/responses$"))
.respond_with(GatedSseResponse {
gate_rx: Mutex::new(Some(gate_completed_rx)),
response: gated_response,
})
.expect(2)
.mount(&main_loop_responses_server)
.await;
let mut harness = RealtimeE2eHarness::new_with_main_loop_responses_server(
RealtimeTestVersion::V2,
main_loop_responses_server,
realtime_sideband(vec![realtime_sideband_connection(vec![
vec![
session_updated("sess_v2_steering_ack"),
v2_background_agent_tool_call("call_active", "start a task"),
v2_background_agent_tool_call("call_steer", "steer the active task"),
],
vec![],
vec![],
vec![],
vec![],
])]),
)
.await?;
let started = harness.start_webrtc_realtime("v=offer\r\n").await?;
assert_eq!(started.started.version, RealtimeConversationVersion::V2);
assert_v2_session_update(&harness.sideband_outbound_request(/*request_index*/ 0).await)?;
let turn_started = harness
.read_notification::<TurnStartedNotification>("turn/started")
.await?;
assert_eq!(turn_started.thread_id, harness.thread_id);
// Phase 2: the second tool call happens while `call_active` is still
// running, so app-server sends a steering acknowledgement as a function-call
// output for the second call.
assert_v2_function_call_output(
&harness.sideband_outbound_request(/*request_index*/ 1).await,
"call_steer",
V2_STEERING_ACKNOWLEDGEMENT,
);
// Phase 3: realtime needs a `response.create` after the steering
// acknowledgement so it can surface that acknowledgement to the user.
assert_v2_response_create(&harness.sideband_outbound_request(/*request_index*/ 2).await);
// Phase 4: release the gated delegated turn. Codex should then continue
// the same run with the steering text included in the follow-up Responses
// request, proving realtime did not merely acknowledge and drop it.
let _ = gate_completed_tx.send(());
let turn_completed = harness
.read_notification::<TurnCompletedNotification>("turn/completed")
.await?;
assert_eq!(turn_completed.thread_id, harness.thread_id);
let requests = harness.main_loop_responses_requests().await?;
assert_eq!(requests.len(), 2);
assert!(
response_request_contains_text(&requests[1], "steer the active task"),
"follow-up Responses request should contain steering prompt: {}",
requests[1]
);
harness.shutdown().await;
Ok(())
}
@@ -1714,6 +1975,32 @@ fn assert_v2_progress_update(request: &Value, expected_text: &str) {
);
}
fn assert_v2_user_text_item(request: &Value, expected_text: &str) {
assert_eq!(
request,
&json!({
"type": "conversation.item.create",
"item": {
"type": "message",
"role": "user",
"content": [{
"type": "input_text",
"text": expected_text
}]
}
})
);
}
fn assert_v2_response_create(request: &Value) {
assert_eq!(
request,
&json!({
"type": "response.create"
})
);
}
fn assert_v1_session_update(request: &Value) -> Result<()> {
assert_eq!(request["type"].as_str(), Some("session.update"));
assert_eq!(request["session"]["type"].as_str(), Some("quicksilver"));