codex: fix exec-server OTEL trace lifecycle

This commit is contained in:
starr-openai
2026-05-29 16:14:32 -07:00
parent 802f373110
commit fc0f0c9933
3 changed files with 66 additions and 28 deletions

4
codex-rs/Cargo.lock generated
View File

@@ -1878,8 +1878,6 @@ dependencies = [
"eventsource-stream",
"futures",
"http 1.4.0",
"opentelemetry",
"opentelemetry_sdk",
"pretty_assertions",
"regex-lite",
"reqwest 0.12.28",
@@ -2789,6 +2787,8 @@ dependencies = [
"ctor 0.6.3",
"futures",
"http 1.4.0",
"opentelemetry",
"opentelemetry_sdk",
"pretty_assertions",
"prost 0.14.3",
"reqwest 0.12.28",

View File

@@ -40,7 +40,6 @@ use owo_colors::OwoColorize;
use std::io::IsTerminal;
use std::path::PathBuf;
use supports_color::Stream;
use tracing::Instrument;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::prelude::*;
@@ -1511,11 +1510,9 @@ async fn run_exec_server_command(
if let Some(base_url) = cmd.remote {
let config = load_exec_server_config(root_config_overrides, strict_config).await?;
let (_otel, telemetry) = init_exec_server_tracing(Some(&config));
let exec_server_span = exec_server_root_span();
let environment_id = cmd
.environment_id
.ok_or_else(|| anyhow::anyhow!("--environment-id is required when --remote is set"))?;
exec_server_span.record("mode", "remote");
let auth_provider =
load_exec_server_remote_auth_provider(&config, &base_url, cmd.use_agent_identity_auth)
.await?;
@@ -1528,9 +1525,7 @@ async fn run_exec_server_command(
remote_config.name = name;
}
let remote_config = remote_config.with_telemetry(telemetry);
codex_exec_server::run_remote_environment(remote_config, runtime_paths)
.instrument(exec_server_span)
.await?;
codex_exec_server::run_remote_environment(remote_config, runtime_paths).await?;
Ok(())
} else {
let config = if strict_config {
@@ -1541,14 +1536,11 @@ async fn run_exec_server_command(
.ok()
};
let (_otel, telemetry) = init_exec_server_tracing(config.as_ref());
let exec_server_span = exec_server_root_span();
exec_server_span.record("mode", "local");
let listen_url = cmd
.listen
.as_deref()
.unwrap_or(codex_exec_server::DEFAULT_LISTEN_URL);
codex_exec_server::run_main_with_telemetry(listen_url, runtime_paths, telemetry)
.instrument(exec_server_span)
.await
.map_err(anyhow::Error::from_boxed)
}
@@ -1596,14 +1588,6 @@ fn init_exec_server_tracing(
(otel, telemetry)
}
fn exec_server_root_span() -> tracing::Span {
tracing::info_span!(
"codex.exec_server",
otel.kind = "internal",
mode = tracing::field::Empty,
)
}
fn exec_server_stderr_env_filter() -> EnvFilter {
EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::try_new(EXEC_SERVER_DEFAULT_LOG_FILTER))

View File

@@ -19,9 +19,26 @@ use crate::server::ConnectionProcessor;
const ERROR_BODY_PREVIEW_BYTES: usize = 4096;
macro_rules! emit_remote_otel_event {
($level:ident, $($fields:tt)*) => {{
tracing::event!(target: "codex_otel.log_only", tracing::Level::$level, $($fields)*);
tracing::event!(target: "codex_otel.trace_safe", tracing::Level::$level, $($fields)*);
($level:ident, $event_name:literal, $($fields:tt)*) => {{
let span = tracing::info_span!(
"codex.exec_server.remote_event",
otel.kind = "internal",
otel.name = $event_name,
);
span.in_scope(|| {
tracing::event!(
target: "codex_otel.log_only",
tracing::Level::$level,
event.name = $event_name,
$($fields)*
);
tracing::event!(
target: "codex_otel.trace_safe",
tracing::Level::$level,
event.name = $event_name,
$($fields)*
);
});
}};
}
@@ -159,7 +176,7 @@ pub async fn run_remote_environment(
warn!(error = %err, "failed to register remote exec-server environment");
emit_remote_otel_event!(
WARN,
event.name = "codex.exec_server.remote_environment_registration_failed",
"codex.exec_server.remote_environment_registration_failed",
"failed to register remote exec-server environment"
);
return Err(err);
@@ -169,7 +186,7 @@ pub async fn run_remote_environment(
info!("codex exec-server remote environment registered");
emit_remote_otel_event!(
INFO,
event.name = "codex.exec_server.remote_environment_registered",
"codex.exec_server.remote_environment_registered",
"codex exec-server remote environment registered"
);
@@ -182,7 +199,7 @@ pub async fn run_remote_environment(
);
emit_remote_otel_event!(
INFO,
event.name = "codex.exec_server.remote_websocket_connected",
"codex.exec_server.remote_websocket_connected",
attempt = connection_attempt,
"connected remote exec-server websocket"
);
@@ -195,7 +212,7 @@ pub async fn run_remote_environment(
);
emit_remote_otel_event!(
WARN,
event.name = "codex.exec_server.remote_websocket_disconnected",
"codex.exec_server.remote_websocket_disconnected",
attempt = connection_attempt,
"remote exec-server websocket disconnected; retrying"
);
@@ -209,7 +226,7 @@ pub async fn run_remote_environment(
);
emit_remote_otel_event!(
WARN,
event.name = "codex.exec_server.remote_websocket_connect_failed",
"codex.exec_server.remote_websocket_connect_failed",
attempt = connection_attempt,
"failed to connect remote exec-server websocket"
);
@@ -224,7 +241,7 @@ pub async fn run_remote_environment(
);
emit_remote_otel_event!(
INFO,
event.name = "codex.exec_server.remote_websocket_retrying",
"codex.exec_server.remote_websocket_retrying",
attempt = connection_attempt,
backoff_ms,
"retrying remote exec-server websocket"
@@ -325,7 +342,12 @@ mod tests {
use codex_api::AuthProvider;
use http::HeaderMap;
use http::HeaderValue;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_sdk::trace::InMemorySpanExporter;
use opentelemetry_sdk::trace::SdkTracerProvider;
use pretty_assertions::assert_eq;
use tracing_subscriber::filter::filter_fn;
use tracing_subscriber::prelude::*;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
@@ -440,4 +462,36 @@ mod tests {
assert!(debug.contains("<redacted>"));
assert!(!debug.contains("workspace-123"));
}
#[test]
fn remote_otel_events_finish_trace_spans_immediately() {
let span_exporter = InMemorySpanExporter::default();
let tracer_provider = SdkTracerProvider::builder()
.with_simple_exporter(span_exporter.clone())
.build();
let tracer = tracer_provider.tracer("exec-server-test");
let subscriber = tracing_subscriber::registry().with(
tracing_opentelemetry::layer()
.with_tracer(tracer)
.with_filter(filter_fn(codex_otel::OtelProvider::trace_export_filter)),
);
tracing::subscriber::with_default(subscriber, || {
tracing::callsite::rebuild_interest_cache();
emit_remote_otel_event!(
INFO,
"codex.exec_server.remote_environment_registered",
"codex exec-server remote environment registered"
);
});
tracer_provider.force_flush().expect("flush traces");
let spans = span_exporter.get_finished_spans().expect("span export");
assert!(
spans.iter().any(|span| {
span.name.as_ref() == "codex.exec_server.remote_environment_registered"
}),
"expected finished remote OTEL lifecycle span, got {spans:?}"
);
}
}