feat(app-server, core): add more spans (#14479)

## Description

This PR expands tracing coverage across app-server thread startup, core
session initialization, and the Responses transport layer. It also gives
core dispatch spans stable operation-specific names so traces are easier
to follow than the old generic `submission_dispatch` spans.

Also use `fmt::Display` for types that we serialize in traces so we send
strings instead of rust types
This commit is contained in:
Owen Lin
2026-03-13 13:16:33 -07:00
committed by GitHub
parent 914f7c7317
commit 014e19510d
17 changed files with 473 additions and 88 deletions

View File

@@ -47,8 +47,6 @@ use tracing_subscriber::layer::SubscriberExt;
use wiremock::MockServer;
const TEST_CONNECTION_ID: ConnectionId = ConnectionId(7);
const CORE_TURN_SANITY_SPAN_NAMES: &[&str] =
&["submission_dispatch", "session_task.turn", "run_turn"];
struct TestTracing {
exporter: InMemorySpanExporter,
@@ -284,17 +282,21 @@ fn find_rpc_span_with_trace<'a>(
})
}
fn find_span_by_name_with_trace<'a>(
fn find_span_with_trace<'a, F>(
spans: &'a [SpanData],
name: &str,
trace_id: TraceId,
) -> &'a SpanData {
description: &str,
predicate: F,
) -> &'a SpanData
where
F: Fn(&SpanData) -> bool,
{
spans
.iter()
.find(|span| span.name.as_ref() == name && span.span_context.trace_id() == trace_id)
.find(|span| span.span_context.trace_id() == trace_id && predicate(span))
.unwrap_or_else(|| {
panic!(
"missing span named {name} for trace={trace_id}; exported spans:\n{}",
"missing span matching {description} for trace={trace_id}; exported spans:\n{}",
format_spans(spans)
)
})
@@ -319,12 +321,17 @@ fn format_spans(spans: &[SpanData]) -> String {
.join("\n")
}
fn assert_span_descends_from(spans: &[SpanData], child: &SpanData, ancestor: &SpanData) {
fn span_depth_from_ancestor(
spans: &[SpanData],
child: &SpanData,
ancestor: &SpanData,
) -> Option<usize> {
let ancestor_span_id = ancestor.span_context.span_id();
let mut parent_span_id = child.parent_span_id;
let mut depth = 1;
while parent_span_id != SpanId::INVALID {
if parent_span_id == ancestor_span_id {
return;
return Some(depth);
}
let Some(parent_span) = spans
.iter()
@@ -333,6 +340,15 @@ fn assert_span_descends_from(spans: &[SpanData], child: &SpanData, ancestor: &Sp
break;
};
parent_span_id = parent_span.parent_span_id;
depth += 1;
}
None
}
fn assert_span_descends_from(spans: &[SpanData], child: &SpanData, ancestor: &SpanData) {
if span_depth_from_ancestor(spans, child, ancestor).is_some() {
return;
}
panic!(
@@ -343,6 +359,27 @@ fn assert_span_descends_from(spans: &[SpanData], child: &SpanData, ancestor: &Sp
);
}
fn assert_has_internal_descendant_at_min_depth(
spans: &[SpanData],
ancestor: &SpanData,
min_depth: usize,
) {
if spans.iter().any(|span| {
span.span_kind == SpanKind::Internal
&& span.span_context.trace_id() == ancestor.span_context.trace_id()
&& span_depth_from_ancestor(spans, span, ancestor)
.is_some_and(|depth| depth >= min_depth)
}) {
return;
}
panic!(
"missing internal descendant at depth >= {min_depth} below {}; exported spans:\n{}",
ancestor.name,
format_spans(spans)
);
}
async fn read_response<T: serde::de::DeserializeOwned>(
outgoing_rx: &mut mpsc::Receiver<crate::outgoing_message::OutgoingEnvelope>,
request_id: i64,
@@ -443,6 +480,21 @@ where
);
}
async fn wait_for_new_exported_spans<F>(
tracing: &TestTracing,
baseline_len: usize,
predicate: F,
) -> Vec<SpanData>
where
F: Fn(&[SpanData]) -> bool,
{
let spans = wait_for_exported_spans(tracing, |spans| {
spans.len() > baseline_len && predicate(&spans[baseline_len..])
})
.await;
spans.into_iter().skip(baseline_len).collect()
}
#[tokio::test(flavor = "current_thread")]
async fn thread_start_jsonrpc_span_exports_server_span_and_parents_children() -> Result<()> {
let _guard = tracing_test_guard().lock().await;
@@ -450,33 +502,65 @@ async fn thread_start_jsonrpc_span_exports_server_span_and_parents_children() ->
let RemoteTrace {
trace_id: remote_trace_id,
parent_span_id: remote_parent_span_id,
context: remote_trace,
..
} = RemoteTrace::new("00000000000000000000000000000011", "0000000000000022");
let _: ThreadStartResponse = harness.start_thread(2, Some(remote_trace)).await;
let spans = wait_for_exported_spans(harness.tracing, |spans| {
let _: ThreadStartResponse = harness.start_thread(20_002, None).await;
let untraced_spans = wait_for_exported_spans(harness.tracing, |spans| {
spans.iter().any(|span| {
span.span_kind == SpanKind::Server
&& span_attr(span, "rpc.method") == Some("thread/start")
})
})
.await;
let untraced_server_span = find_rpc_span_with_trace(
&untraced_spans,
SpanKind::Server,
"thread/start",
untraced_spans
.iter()
.rev()
.find(|span| {
span.span_kind == SpanKind::Server
&& span_attr(span, "rpc.system") == Some("jsonrpc")
&& span_attr(span, "rpc.method") == Some("thread/start")
})
.unwrap_or_else(|| {
panic!(
"missing latest thread/start server span; exported spans:\n{}",
format_spans(&untraced_spans)
)
})
.span_context
.trace_id(),
);
assert_has_internal_descendant_at_min_depth(&untraced_spans, untraced_server_span, 1);
let baseline_len = untraced_spans.len();
let _: ThreadStartResponse = harness.start_thread(20_003, Some(remote_trace)).await;
let spans = wait_for_new_exported_spans(harness.tracing, baseline_len, |spans| {
spans.iter().any(|span| {
span.span_kind == SpanKind::Server
&& span_attr(span, "rpc.method") == Some("thread/start")
&& span.span_context.trace_id() == remote_trace_id
}) && spans.iter().any(|span| {
span.name.as_ref() == "thread_spawn" && span.span_context.trace_id() == remote_trace_id
}) && spans.iter().any(|span| {
span.name.as_ref() == "session_init" && span.span_context.trace_id() == remote_trace_id
span.name.as_ref() == "app_server.thread_start.notify_started"
&& span.span_context.trace_id() == remote_trace_id
})
})
.await;
let server_request_span =
find_rpc_span_with_trace(&spans, SpanKind::Server, "thread/start", remote_trace_id);
let thread_spawn_span = find_span_by_name_with_trace(&spans, "thread_spawn", remote_trace_id);
let session_init_span = find_span_by_name_with_trace(&spans, "session_init", remote_trace_id);
assert_eq!(server_request_span.name.as_ref(), "thread/start");
assert_eq!(server_request_span.parent_span_id, remote_parent_span_id);
assert!(server_request_span.parent_span_is_remote);
assert_eq!(server_request_span.span_context.trace_id(), remote_trace_id);
assert_ne!(server_request_span.span_context.span_id(), SpanId::INVALID);
assert_span_descends_from(&spans, thread_spawn_span, server_request_span);
assert_span_descends_from(&spans, session_init_span, server_request_span);
assert_has_internal_descendant_at_min_depth(&spans, server_request_span, 1);
assert_has_internal_descendant_at_min_depth(&spans, server_request_span, 2);
harness.shutdown().await;
Ok(())
@@ -527,7 +611,7 @@ async fn turn_start_jsonrpc_span_parents_core_turn_spans() -> Result<()> {
&& span_attr(span, "rpc.method") == Some("turn/start")
&& span.span_context.trace_id() == remote_trace_id
}) && spans.iter().any(|span| {
CORE_TURN_SANITY_SPAN_NAMES.contains(&span.name.as_ref())
span_attr(span, "codex.op") == Some("user_input")
&& span.span_context.trace_id() == remote_trace_id
})
})
@@ -535,17 +619,9 @@ async fn turn_start_jsonrpc_span_parents_core_turn_spans() -> Result<()> {
let server_request_span =
find_rpc_span_with_trace(&spans, SpanKind::Server, "turn/start", remote_trace_id);
let core_turn_span = spans
.iter()
.find(|span| {
CORE_TURN_SANITY_SPAN_NAMES.contains(&span.name.as_ref())
&& span.span_context.trace_id() == remote_trace_id
})
.unwrap_or_else(|| {
panic!(
"missing representative core turn span for trace={remote_trace_id}; exported spans:\n{}",
format_spans(&spans)
)
let core_turn_span =
find_span_with_trace(&spans, remote_trace_id, "codex.op=user_input", |span| {
span_attr(span, "codex.op") == Some("user_input")
});
assert_eq!(server_request_span.parent_span_id, remote_parent_span_id);