Stabilize websocket response.failed error delivery (#14017)

## What changed
- Drop failed websocket connections immediately after a terminal stream
error instead of awaiting a graceful close handshake before forwarding
the error to the caller.
- Keep the success path and the closed-connection guard behavior
unchanged.

## Why this fixes the flake
- The failing integration test waits for the second websocket stream to
surface the model error before issuing a follow-up request.
- On slower runners, the old error path awaited
`ws_stream.close().await` before sending the error downstream. If that
close handshake stalled, the test kept waiting for an error that had
already happened server-side and nextest timed it out.
- Dropping the failed websocket immediately makes the terminal error
observable right away and marks the session closed so the next request
reconnects cleanly instead of depending on a best-effort close
handshake.

## Code or test?
- This is a production logic fix in `codex-api`. The existing websocket
integration test already exercises the regression path.
This commit is contained in:
Ahmed Ibrahim
2026-03-10 17:59:41 -07:00
committed by Michael Bolin
parent 285b3a5143
commit c8446d7cf3
5 changed files with 102 additions and 32 deletions

View File

@@ -53,9 +53,6 @@ enum WsCommand {
message: Message,
tx_result: oneshot::Sender<Result<(), WsError>>,
},
Close {
tx_result: oneshot::Sender<Result<(), WsError>>,
},
}
impl WsStream {
@@ -80,11 +77,6 @@ impl WsStream {
break;
}
}
WsCommand::Close { tx_result } => {
let result = inner.close(None).await;
let _ = tx_result.send(result);
break;
}
}
}
message = inner.next() => {
@@ -144,11 +136,6 @@ impl WsStream {
.await
}
async fn close(&self) -> Result<(), WsError> {
self.request(|tx_result| WsCommand::Close { tx_result })
.await
}
async fn next(&mut self) -> Option<Result<Message, WsError>> {
self.rx_message.recv().await
}
@@ -242,26 +229,32 @@ impl ResponsesWebsocketConnection {
.await;
}
let mut guard = stream.lock().await;
let Some(ws_stream) = guard.as_mut() else {
let _ = tx_event
.send(Err(ApiError::Stream(
"websocket connection is closed".to_string(),
)))
.await;
return;
let result = {
let Some(ws_stream) = guard.as_mut() else {
let _ = tx_event
.send(Err(ApiError::Stream(
"websocket connection is closed".to_string(),
)))
.await;
return;
};
run_websocket_response_stream(
ws_stream,
tx_event.clone(),
request_body,
idle_timeout,
telemetry,
)
.await
};
if let Err(err) = run_websocket_response_stream(
ws_stream,
tx_event.clone(),
request_body,
idle_timeout,
telemetry,
)
.await
{
let _ = ws_stream.close().await;
*guard = None;
if let Err(err) = result {
// A terminal stream error should reach the caller immediately. Waiting for a
// graceful close handshake here can stall indefinitely and mask the error.
let failed_stream = guard.take();
drop(guard);
drop(failed_stream);
let _ = tx_event.send(Err(err)).await;
}
});