Stabilize websocket response.failed error delivery (#14017)

## What changed
- Drop failed websocket connections immediately after a terminal stream
error instead of awaiting a graceful close handshake before forwarding
the error to the caller.
- Keep the success path and the closed-connection guard behavior
unchanged.

## Why this fixes the flake
- The failing integration test waits for the second websocket stream to
surface the model error before issuing a follow-up request.
- On slower runners, the old error path awaited
`ws_stream.close().await` before sending the error downstream. If that
close handshake stalled, the test kept waiting for an error that had
already happened server-side and nextest timed it out.
- Dropping the failed websocket immediately makes the terminal error
observable right away and marks the session closed so the next request
reconnects cleanly instead of depending on a best-effort close
handshake.

## Code or test?
- This is a production logic fix in `codex-api`. The existing websocket
integration test already exercises the regression path.
This commit is contained in:
Ahmed Ibrahim
2026-03-10 17:59:41 -07:00
committed by Michael Bolin
parent 285b3a5143
commit c8446d7cf3
5 changed files with 102 additions and 32 deletions

View File

@@ -653,6 +653,7 @@ async fn responses_websocket_emits_reasoning_included_event() {
requests: vec![vec![ev_response_created("resp-1"), ev_completed("resp-1")]],
response_headers: vec![("X-Reasoning-Included".to_string(), "true".to_string())],
accept_delay: None,
close_after_requests: true,
}])
.await;
@@ -725,6 +726,7 @@ async fn responses_websocket_emits_rate_limit_events() {
("X-Reasoning-Included".to_string(), "true".to_string()),
],
accept_delay: None,
close_after_requests: true,
}])
.await;
@@ -1369,6 +1371,65 @@ async fn responses_websocket_v2_after_error_uses_full_create_without_previous_re
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_surfaces_terminal_error_without_close_handshake() {
skip_if_no_network!();
let server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
requests: vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![json!({
"type": "response.failed",
"response": {
"error": {
"code": "invalid_prompt",
"message": "synthetic websocket failure"
}
}
})],
],
response_headers: Vec::new(),
accept_delay: None,
close_after_requests: false,
}])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![message_item("hello"), message_item("second")]);
stream_until_complete(&mut session, &harness, &prompt_one).await;
let mut second_stream = session
.stream(
&prompt_two,
&harness.model_info,
&harness.session_telemetry,
harness.effort,
harness.summary,
None,
None,
)
.await
.expect("websocket stream failed");
let saw_error = tokio::time::timeout(Duration::from_secs(2), async {
while let Some(event) = second_stream.next().await {
if event.is_err() {
return true;
}
}
false
})
.await
.expect("timed out waiting for terminal websocket error");
assert!(saw_error, "expected second websocket stream to error");
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_sets_openai_beta_header() {
skip_if_no_network!();