feat(app-server, core): add more spans (#14479)

## Description

This PR expands tracing coverage across app-server thread startup, core
session initialization, and the Responses transport layer. It also gives
core dispatch spans stable operation-specific names so traces are easier
to follow than the old generic `submission_dispatch` spans.

Also use `fmt::Display` for types that we serialize in traces so we send
strings instead of rust types
This commit is contained in:
Owen Lin
2026-03-13 13:16:33 -07:00
committed by GitHub
parent 914f7c7317
commit 014e19510d
17 changed files with 473 additions and 88 deletions

View File

@@ -21,6 +21,7 @@ use http::Method;
use serde_json::Value;
use std::sync::Arc;
use std::sync::OnceLock;
use tracing::instrument;
pub struct ResponsesClient<T: HttpTransport, A: AuthProvider> {
session: EndpointSession<T, A>,
@@ -55,6 +56,16 @@ impl<T: HttpTransport, A: AuthProvider> ResponsesClient<T, A> {
}
}
#[instrument(
name = "responses.stream_request",
level = "info",
skip_all,
fields(
transport = "responses_http",
http.method = "POST",
api.path = "responses"
)
)]
pub async fn stream_request(
&self,
request: ResponsesApiRequest,
@@ -90,6 +101,17 @@ impl<T: HttpTransport, A: AuthProvider> ResponsesClient<T, A> {
"responses"
}
#[instrument(
name = "responses.stream",
level = "info",
skip_all,
fields(
transport = "responses_http",
http.method = "POST",
api.path = "responses",
turn.has_state = turn_state.is_some()
)
)]
pub async fn stream(
&self,
body: Value,

View File

@@ -35,9 +35,12 @@ use tokio_tungstenite::connect_async_tls_with_config;
use tokio_tungstenite::tungstenite::Error as WsError;
use tokio_tungstenite::tungstenite::Message;
use tokio_tungstenite::tungstenite::client::IntoClientRequest;
use tracing::Instrument;
use tracing::Span;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::instrument;
use tracing::trace;
use tungstenite::extensions::ExtensionsConfig;
use tungstenite::extensions::compression::deflate::DeflateConfig;
@@ -202,6 +205,12 @@ impl ResponsesWebsocketConnection {
self.stream.lock().await.is_none()
}
#[instrument(
name = "responses_websocket.stream_request",
level = "info",
skip_all,
fields(transport = "responses_websocket", api.path = "responses")
)]
pub async fn stream_request(
&self,
request: ResponsesWsRequest,
@@ -218,48 +227,52 @@ impl ResponsesWebsocketConnection {
ApiError::Stream(format!("failed to encode websocket request: {err}"))
})?;
tokio::spawn(async move {
if let Some(model) = server_model {
let _ = tx_event.send(Ok(ResponseEvent::ServerModel(model))).await;
}
if let Some(etag) = models_etag {
let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
}
if server_reasoning_included {
let _ = tx_event
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
.await;
}
let mut guard = stream.lock().await;
let result = {
let Some(ws_stream) = guard.as_mut() else {
let current_span = Span::current();
tokio::spawn(
async move {
if let Some(model) = server_model {
let _ = tx_event.send(Ok(ResponseEvent::ServerModel(model))).await;
}
if let Some(etag) = models_etag {
let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
}
if server_reasoning_included {
let _ = tx_event
.send(Err(ApiError::Stream(
"websocket connection is closed".to_string(),
)))
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
.await;
return;
}
let mut guard = stream.lock().await;
let result = {
let Some(ws_stream) = guard.as_mut() else {
let _ = tx_event
.send(Err(ApiError::Stream(
"websocket connection is closed".to_string(),
)))
.await;
return;
};
run_websocket_response_stream(
ws_stream,
tx_event.clone(),
request_body,
idle_timeout,
telemetry,
)
.await
};
run_websocket_response_stream(
ws_stream,
tx_event.clone(),
request_body,
idle_timeout,
telemetry,
)
.await
};
if let Err(err) = result {
// A terminal stream error should reach the caller immediately. Waiting for a
// graceful close handshake here can stall indefinitely and mask the error.
let failed_stream = guard.take();
drop(guard);
drop(failed_stream);
let _ = tx_event.send(Err(err)).await;
if let Err(err) = result {
// A terminal stream error should reach the caller immediately. Waiting for a
// graceful close handshake here can stall indefinitely and mask the error.
let failed_stream = guard.take();
drop(guard);
drop(failed_stream);
let _ = tx_event.send(Err(err)).await;
}
}
});
.instrument(current_span),
);
Ok(ResponseStream { rx_event })
}
@@ -275,6 +288,12 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
Self { provider, auth }
}
#[instrument(
name = "responses_websocket.connect",
level = "info",
skip_all,
fields(transport = "responses_websocket", api.path = "responses")
)]
pub async fn connect(
&self,
extra_headers: HeaderMap,

View File

@@ -12,6 +12,7 @@ use http::HeaderMap;
use http::Method;
use serde_json::Value;
use std::sync::Arc;
use tracing::instrument;
pub(crate) struct EndpointSession<T: HttpTransport, A: AuthProvider> {
transport: T,
@@ -68,6 +69,12 @@ impl<T: HttpTransport, A: AuthProvider> EndpointSession<T, A> {
.await
}
#[instrument(
name = "endpoint_session.execute_with",
level = "info",
skip_all,
fields(http.method = %method, api.path = path)
)]
pub(crate) async fn execute_with<C>(
&self,
method: Method,
@@ -96,6 +103,12 @@ impl<T: HttpTransport, A: AuthProvider> EndpointSession<T, A> {
Ok(response)
}
#[instrument(
name = "endpoint_session.stream_with",
level = "info",
skip_all,
fields(http.method = %method, api.path = path)
)]
pub(crate) async fn stream_with<C>(
&self,
method: Method,