mirror of
https://github.com/openai/codex.git
synced 2026-04-25 15:15:15 +00:00
Add websocket telemetry metrics and labels (#10316)
Summary - expose websocket telemetry hooks through the responses client so request durations and event processing can be reported - record websocket request/event metrics and emit runtime telemetry events that the history UI now surfaces - improve tests to cover websocket telemetry reporting and guard runtime summary updates <img width="824" height="79" alt="Screenshot 2026-01-31 at 5 28 12 PM" src="https://github.com/user-attachments/assets/ea9a7965-d8b4-4e3c-a984-ef4fdc44c81d" />
This commit is contained in:
@@ -6,6 +6,7 @@ use crate::error::ApiError;
|
||||
use crate::provider::Provider;
|
||||
use crate::sse::responses::ResponsesStreamEvent;
|
||||
use crate::sse::responses::process_responses_event;
|
||||
use crate::telemetry::WebsocketTelemetry;
|
||||
use codex_client::TransportError;
|
||||
use futures::SinkExt;
|
||||
use futures::StreamExt;
|
||||
@@ -18,6 +19,7 @@ use std::time::Duration;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Instant;
|
||||
use tokio_tungstenite::MaybeTlsStream;
|
||||
use tokio_tungstenite::WebSocketStream;
|
||||
use tokio_tungstenite::tungstenite::Error as WsError;
|
||||
@@ -38,14 +40,21 @@ pub struct ResponsesWebsocketConnection {
|
||||
// TODO (pakrym): is this the right place for timeout?
|
||||
idle_timeout: Duration,
|
||||
server_reasoning_included: bool,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
}
|
||||
|
||||
impl ResponsesWebsocketConnection {
|
||||
fn new(stream: WsStream, idle_timeout: Duration, server_reasoning_included: bool) -> Self {
|
||||
fn new(
|
||||
stream: WsStream,
|
||||
idle_timeout: Duration,
|
||||
server_reasoning_included: bool,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
stream: Arc::new(Mutex::new(Some(stream))),
|
||||
idle_timeout,
|
||||
server_reasoning_included,
|
||||
telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,6 +71,7 @@ impl ResponsesWebsocketConnection {
|
||||
let stream = Arc::clone(&self.stream);
|
||||
let idle_timeout = self.idle_timeout;
|
||||
let server_reasoning_included = self.server_reasoning_included;
|
||||
let telemetry = self.telemetry.clone();
|
||||
let request_body = serde_json::to_value(&request).map_err(|err| {
|
||||
ApiError::Stream(format!("failed to encode websocket request: {err}"))
|
||||
})?;
|
||||
@@ -87,6 +97,7 @@ impl ResponsesWebsocketConnection {
|
||||
tx_event.clone(),
|
||||
request_body,
|
||||
idle_timeout,
|
||||
telemetry,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -114,6 +125,7 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
|
||||
&self,
|
||||
extra_headers: HeaderMap,
|
||||
turn_state: Option<Arc<OnceLock<String>>>,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
) -> Result<ResponsesWebsocketConnection, ApiError> {
|
||||
let ws_url = self
|
||||
.provider
|
||||
@@ -130,6 +142,7 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
|
||||
stream,
|
||||
self.provider.stream_idle_timeout,
|
||||
server_reasoning_included,
|
||||
telemetry,
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -218,6 +231,7 @@ async fn run_websocket_response_stream(
|
||||
tx_event: mpsc::Sender<std::result::Result<ResponseEvent, ApiError>>,
|
||||
request_body: Value,
|
||||
idle_timeout: Duration,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
) -> Result<(), ApiError> {
|
||||
let request_text = match serde_json::to_string(&request_body) {
|
||||
Ok(text) => text,
|
||||
@@ -228,16 +242,26 @@ async fn run_websocket_response_stream(
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(err) = ws_stream.send(Message::Text(request_text.into())).await {
|
||||
return Err(ApiError::Stream(format!(
|
||||
"failed to send websocket request: {err}"
|
||||
)));
|
||||
let request_start = Instant::now();
|
||||
let result = ws_stream
|
||||
.send(Message::Text(request_text.into()))
|
||||
.await
|
||||
.map_err(|err| ApiError::Stream(format!("failed to send websocket request: {err}")));
|
||||
|
||||
if let Some(t) = telemetry.as_ref() {
|
||||
t.on_ws_request(request_start.elapsed(), result.as_ref().err());
|
||||
}
|
||||
|
||||
result?;
|
||||
|
||||
loop {
|
||||
let poll_start = Instant::now();
|
||||
let response = tokio::time::timeout(idle_timeout, ws_stream.next())
|
||||
.await
|
||||
.map_err(|_| ApiError::Stream("idle timeout waiting for websocket".into()));
|
||||
if let Some(t) = telemetry.as_ref() {
|
||||
t.on_ws_event(&response, poll_start.elapsed());
|
||||
}
|
||||
let message = match response {
|
||||
Ok(Some(Ok(msg))) => msg,
|
||||
Ok(Some(Err(err))) => {
|
||||
|
||||
Reference in New Issue
Block a user