feat(app-server): propagate traces across tasks and core ops (#14387)

## Summary

This PR keeps app-server RPC request trace context alive for the full
lifetime of the work that request kicks off (e.g. for `thread/start`,
this is `app-server rpc handler -> tokio background task -> core op
submissions`). Previously we lose trace lineage once the request handler
returns or hands work off to background tasks.

This approach is especially relevant for `thread/start` and other RPC
handlers that run in a non-blocking way. In the near future we'll most
likely want to make all app-server handlers run in a non-blocking way by
default, and only queue operations that must operate in order (e.g.
thread RPCs per thread?), so we want to make sure tracing in app-server
just generally works.

Depends on https://github.com/openai/codex/pull/14300

**Before**
<img width="155" height="207" alt="image"
src="https://github.com/user-attachments/assets/c9487459-36f1-436c-beb7-fafeb40737af"
/>


**After**
<img width="299" height="337" alt="image"
src="https://github.com/user-attachments/assets/727392b2-d072-4427-9dc4-0502d8652dea"
/>

## What changed

- Keep request-scoped trace context around until we send the final
response or error, or the connection closes.
- Thread that trace context through detached `thread/start` work so
background startup stays attached to the originating request.
- Pass request trace context through to downstream core operations,
including:
  - thread creation
  - resume/fork flows
  - turn submission
  - review
  - interrupt
  - realtime conversation operations
- Add tracing tests that verify:
  - remote W3C trace context is preserved for `thread/start`
  - remote W3C trace context is preserved for `turn/start`
  - downstream core spans stay under the originating request span
  - request-scoped tracing state is cleaned up correctly
- Clean up shutdown behavior so detached background tasks and spawned
threads are drained before process exit.
This commit is contained in:
Owen Lin
2026-03-11 20:18:31 -07:00
committed by GitHub
parent bf5e997b31
commit 5bc82c5b93
24 changed files with 1524 additions and 308 deletions

View File

@@ -27,50 +27,29 @@ pub(crate) fn request_span(
connection_id: ConnectionId,
session: &ConnectionSessionState,
) -> Span {
let span = info_span!(
"app_server.request",
otel.kind = "server",
otel.name = request.method.as_str(),
rpc.system = "jsonrpc",
rpc.method = request.method.as_str(),
rpc.transport = transport_name(transport),
rpc.request_id = ?request.id,
app_server.connection_id = ?connection_id,
app_server.api_version = "v2",
app_server.client_name = field::Empty,
app_server.client_version = field::Empty,
let initialize_client_info = initialize_client_info(request);
let method = request.method.as_str();
let span = app_server_request_span_template(
method,
transport_name(transport),
&request.id,
connection_id,
);
let initialize_client_info = initialize_client_info(request);
if let Some(client_name) = client_name(initialize_client_info.as_ref(), session) {
span.record("app_server.client_name", client_name);
}
if let Some(client_version) = client_version(initialize_client_info.as_ref(), session) {
span.record("app_server.client_version", client_version);
}
record_client_info(
&span,
client_name(initialize_client_info.as_ref(), session),
client_version(initialize_client_info.as_ref(), session),
);
if let Some(traceparent) = request
.trace
.as_ref()
.and_then(|trace| trace.traceparent.as_deref())
{
let trace = W3cTraceContext {
traceparent: Some(traceparent.to_string()),
tracestate: request
.trace
.as_ref()
.and_then(|value| value.tracestate.clone()),
};
if !set_parent_from_w3c_trace_context(&span, &trace) {
tracing::warn!(
rpc_method = request.method.as_str(),
rpc_request_id = ?request.id,
"ignoring invalid inbound request trace carrier"
);
}
} else if let Some(context) = traceparent_context_from_env() {
set_parent_from_context(&span, context);
}
let parent_trace = request.trace.as_ref().and_then(|trace| {
trace.traceparent.as_ref()?;
Some(W3cTraceContext {
traceparent: trace.traceparent.clone(),
tracestate: trace.tracestate.clone(),
})
});
attach_parent_context(&span, method, &request.id, parent_trace.as_ref());
span
}
@@ -86,37 +65,20 @@ pub(crate) fn typed_request_span(
session: &ConnectionSessionState,
) -> Span {
let method = request.method();
let span = info_span!(
"app_server.request",
otel.kind = "server",
otel.name = method,
rpc.system = "jsonrpc",
rpc.method = method,
rpc.transport = "in-process",
rpc.request_id = ?request.id(),
app_server.connection_id = ?connection_id,
app_server.api_version = "v2",
app_server.client_name = field::Empty,
app_server.client_version = field::Empty,
let span = app_server_request_span_template(&method, "in-process", request.id(), connection_id);
let client_info = initialize_client_info_from_typed_request(request);
record_client_info(
&span,
client_info
.map(|(client_name, _)| client_name)
.or(session.app_server_client_name.as_deref()),
client_info
.map(|(_, client_version)| client_version)
.or(session.client_version.as_deref()),
);
if let Some((client_name, client_version)) = initialize_client_info_from_typed_request(request)
{
span.record("app_server.client_name", client_name);
span.record("app_server.client_version", client_version);
} else {
if let Some(client_name) = session.app_server_client_name.as_deref() {
span.record("app_server.client_name", client_name);
}
if let Some(client_version) = session.client_version.as_deref() {
span.record("app_server.client_version", client_version);
}
}
if let Some(context) = traceparent_context_from_env() {
set_parent_from_context(&span, context);
}
attach_parent_context(&span, &method, request.id(), None);
span
}
@@ -127,6 +89,55 @@ fn transport_name(transport: AppServerTransport) -> &'static str {
}
}
fn app_server_request_span_template(
method: &str,
transport: &'static str,
request_id: &impl std::fmt::Debug,
connection_id: ConnectionId,
) -> Span {
info_span!(
"app_server.request",
otel.kind = "server",
otel.name = method,
rpc.system = "jsonrpc",
rpc.method = method,
rpc.transport = transport,
rpc.request_id = ?request_id,
app_server.connection_id = ?connection_id,
app_server.api_version = "v2",
app_server.client_name = field::Empty,
app_server.client_version = field::Empty,
)
}
fn record_client_info(span: &Span, client_name: Option<&str>, client_version: Option<&str>) {
if let Some(client_name) = client_name {
span.record("app_server.client_name", client_name);
}
if let Some(client_version) = client_version {
span.record("app_server.client_version", client_version);
}
}
fn attach_parent_context(
span: &Span,
method: &str,
request_id: &impl std::fmt::Debug,
parent_trace: Option<&W3cTraceContext>,
) {
if let Some(trace) = parent_trace {
if !set_parent_from_w3c_trace_context(span, trace) {
tracing::warn!(
rpc_method = method,
rpc_request_id = ?request_id,
"ignoring invalid inbound request trace carrier"
);
}
} else if let Some(context) = traceparent_context_from_env() {
set_parent_from_context(span, context);
}
}
fn client_name<'a>(
initialize_client_info: Option<&'a InitializeParams>,
session: &'a ConnectionSessionState,