From 31b233c7c6883e23c8a1e1f9c4917fe638250522 Mon Sep 17 00:00:00 2001 From: bbrown-oai Date: Thu, 7 May 2026 16:06:57 -0700 Subject: [PATCH] codex-otel: add configurable trace metadata (#21556) Add Codex config for static trace span attributes and structured W3C tracestate field upserts. The config flows through OtelSettings so callers can attach trace metadata without touching every span call site. Apply span attributes with an SDK span processor so every exported trace span carries the configured metadata. Model tracestate as nested member fields so configured keys can be upserted while unrelated propagated state in the same member is preserved. Validate configured tracestate before installing provider-global state, including header-unsafe values the SDK does not reject by itself. This keeps Codex from propagating malformed trace context from config. Update the config schema, public docs, and OTLP loopback coverage for config parsing, span export, propagation, and invalid-header rejection. --- codex-rs/config/src/loader/mod.rs | 1 + codex-rs/config/src/types.rs | 10 + codex-rs/core/config.schema.json | 17 ++ .../core/src/config/config_loader_tests.rs | 4 + codex-rs/core/src/config/config_tests.rs | 116 ++++++++++ codex-rs/core/src/config/mod.rs | 27 +-- codex-rs/core/src/config/otel.rs | 117 +++++++++++ codex-rs/core/src/otel_init.rs | 2 + codex-rs/otel/README.md | 22 ++ codex-rs/otel/src/config.rs | 15 ++ codex-rs/otel/src/lib.rs | 3 + codex-rs/otel/src/provider.rs | 114 ++++++++-- codex-rs/otel/src/trace_context.rs | 198 +++++++++++++++++- .../otel/tests/suite/otlp_http_loopback.rs | 84 +++++++- codex-rs/windows-sandbox-rs/src/wfp_setup.rs | 3 + docs/config.md | 21 ++ 16 files changed, 707 insertions(+), 47 deletions(-) create mode 100644 codex-rs/core/src/config/otel.rs diff --git a/codex-rs/config/src/loader/mod.rs b/codex-rs/config/src/loader/mod.rs index f5f8ec44e5..e9f819bcf9 100644 --- a/codex-rs/config/src/loader/mod.rs +++ b/codex-rs/config/src/loader/mod.rs @@ -60,6 +60,7 @@ const PROJECT_LOCAL_CONFIG_DENYLIST: &[&str] = &[ "profile", "profiles", "experimental_realtime_ws_base_url", + "otel", ]; async fn first_layer_config_error_from_entries(layers: &[ConfigLayerEntry]) -> Option { diff --git a/codex-rs/config/src/types.rs b/codex-rs/config/src/types.rs index 6cb9abc507..39fd0a442f 100644 --- a/codex-rs/config/src/types.rs +++ b/codex-rs/config/src/types.rs @@ -514,6 +514,12 @@ pub struct OtelConfigToml { /// Optional metrics exporter pub metrics_exporter: Option, + + /// Attributes to add to every exported trace span. + pub span_attributes: Option>, + + /// Semicolon-separated `key:value` fields to upsert into W3C tracestate members. + pub tracestate: Option>>, } /// Effective OTEL settings after defaults are applied. @@ -524,6 +530,8 @@ pub struct OtelConfig { pub exporter: OtelExporterKind, pub trace_exporter: OtelExporterKind, pub metrics_exporter: OtelExporterKind, + pub span_attributes: BTreeMap, + pub tracestate: BTreeMap>, } impl Default for OtelConfig { @@ -534,6 +542,8 @@ impl Default for OtelConfig { exporter: OtelExporterKind::None, trace_exporter: OtelExporterKind::None, metrics_exporter: OtelExporterKind::Statsig, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), } } } diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 6eab1d4c55..c50c4d97af 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -1695,6 +1695,13 @@ ], "description": "Optional metrics exporter" }, + "span_attributes": { + "additionalProperties": { + "type": "string" + }, + "description": "Attributes to add to every exported trace span.", + "type": "object" + }, "trace_exporter": { "allOf": [ { @@ -1702,6 +1709,16 @@ } ], "description": "Optional trace exporter" + }, + "tracestate": { + "additionalProperties": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "description": "Semicolon-separated `key:value` fields to upsert into W3C tracestate members.", + "type": "object" } }, "type": "object" diff --git a/codex-rs/core/src/config/config_loader_tests.rs b/codex-rs/core/src/config/config_loader_tests.rs index 6fcd5f872d..4a7a33b7e6 100644 --- a/codex-rs/core/src/config/config_loader_tests.rs +++ b/codex-rs/core/src/config/config_loader_tests.rs @@ -1752,6 +1752,9 @@ notify = ["sh", "-c", "echo attacker"] profile = "attacker" experimental_realtime_ws_base_url = "wss://attacker.example/realtime" +[otel] +environment = "attacker" + [profiles.attacker] model = "attacker-model" model_instructions_file = 1 @@ -1801,6 +1804,7 @@ wire_api = "responses" "profile", "profiles", "experimental_realtime_ws_base_url", + "otel", ]; let expected_startup_warnings = vec![format!( concat!( diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index bc9af28a67..368ad769db 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -44,6 +44,9 @@ use codex_config::types::Notice; use codex_config::types::NotificationCondition; use codex_config::types::NotificationMethod; use codex_config::types::Notifications; +use codex_config::types::OtelConfig; +use codex_config::types::OtelConfigToml; +use codex_config::types::OtelExporterKind; use codex_config::types::SandboxWorkspaceWrite; use codex_config::types::SessionPickerViewMode; use codex_config::types::SkillsConfig; @@ -7118,6 +7121,119 @@ async fn trace_exporter_defaults_to_none_when_log_exporter_is_set() -> std::io:: Ok(()) } +#[tokio::test] +async fn load_config_applies_otel_trace_metadata() -> std::io::Result<()> { + let mut fixture = create_test_fixture()?; + fixture.cfg = toml::from_str( + r#" +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +"#, + ) + .expect("TOML deserialization should succeed"); + + let config = Config::load_from_base_config_with_overrides( + fixture.cfg.clone(), + ConfigOverrides { + cwd: Some(fixture.cwd_path()), + ..Default::default() + }, + fixture.codex_home(), + ) + .await?; + + assert_eq!( + config.otel.span_attributes, + BTreeMap::from([("example.trace_attr".to_string(), "enabled".to_string())]) + ); + assert_eq!( + config.otel.tracestate, + BTreeMap::from([( + "example".to_string(), + BTreeMap::from([ + ("alpha".to_string(), "one".to_string()), + ("beta".to_string(), "two".to_string()), + ]), + )]) + ); + Ok(()) +} + +#[tokio::test] +async fn load_config_drops_invalid_otel_trace_metadata_entries() -> std::io::Result<()> { + let mut fixture = create_test_fixture()?; + fixture.cfg = toml::from_str( + r#" +[otel] +environment = "test" + +[otel.span_attributes] +"" = "missing-key" +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two\ntoo" + +[otel.tracestate.bad] +alpha = "one\ntwo" +"#, + ) + .expect("TOML deserialization should succeed"); + + let config = Config::load_from_base_config_with_overrides( + fixture.cfg.clone(), + ConfigOverrides { + cwd: Some(fixture.cwd_path()), + ..Default::default() + }, + fixture.codex_home(), + ) + .await?; + + assert_eq!(config.otel.environment, "test"); + assert_eq!( + config.otel.span_attributes, + BTreeMap::from([("example.trace_attr".to_string(), "enabled".to_string())]) + ); + assert_eq!( + config.otel.tracestate, + BTreeMap::from([( + "example".to_string(), + BTreeMap::from([("alpha".to_string(), "one".to_string())]), + )]) + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.span_attributes` config") + && warning.contains("configured span attribute key must not be empty") + }), + "{:?}", + config.startup_warnings + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.tracestate` config") + && warning.contains("invalid configured tracestate value for example.beta") + }), + "{:?}", + config.startup_warnings + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.tracestate` config") + && warning.contains("invalid configured tracestate value for bad.alpha") + }), + "{:?}", + config.startup_warnings + ); + Ok(()) +} + #[tokio::test] async fn explicit_null_service_tier_override_sets_fast_default_opt_out() -> std::io::Result<()> { let fixture = create_test_fixture()?; diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index b2dfbcb2b8..cf0ccbe544 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -37,7 +37,6 @@ use codex_config::profile_toml::ConfigProfile; use codex_config::sandbox_mode_requirement_for_permission_profile; use codex_config::types::ApprovalsReviewer; use codex_config::types::AuthCredentialsStoreMode; -use codex_config::types::DEFAULT_OTEL_ENVIRONMENT; use codex_config::types::History; use codex_config::types::McpServerConfig; use codex_config::types::McpServerDisabledReason; @@ -46,9 +45,6 @@ use codex_config::types::MemoriesConfig; use codex_config::types::ModelAvailabilityNuxConfig; use codex_config::types::Notice; use codex_config::types::OAuthCredentialsStoreMode; -use codex_config::types::OtelConfig; -use codex_config::types::OtelConfigToml; -use codex_config::types::OtelExporterKind; use codex_config::types::SessionPickerViewMode; use codex_config::types::ToolSuggestConfig; use codex_config::types::ToolSuggestDisabledTool; @@ -132,6 +128,7 @@ pub(crate) mod agent_roles; pub mod edit; mod managed_features; mod network_proxy_spec; +mod otel; mod permissions; #[cfg(test)] mod schema; @@ -2978,6 +2975,7 @@ impl Config { .value .set(effective_permission_profile) .map_err(std::io::Error::from)?; + let otel = otel::resolve_config(cfg.otel.unwrap_or_default(), &mut startup_warnings); let config = Self { model, service_tier, @@ -3205,26 +3203,7 @@ impl Config { .as_ref() .map(|t| t.keymap.clone()) .unwrap_or_default(), - otel: { - let t: OtelConfigToml = cfg.otel.unwrap_or_default(); - let log_user_prompt = t.log_user_prompt.unwrap_or(false); - let environment = t - .environment - .unwrap_or(DEFAULT_OTEL_ENVIRONMENT.to_string()); - let exporter = t.exporter.unwrap_or(OtelExporterKind::None); - // OTLP HTTP endpoints are signal-specific in our config, so - // enabling log export must not implicitly send spans to a - // /v1/logs endpoint. - let trace_exporter = t.trace_exporter.unwrap_or(OtelExporterKind::None); - let metrics_exporter = t.metrics_exporter.unwrap_or(OtelExporterKind::Statsig); - OtelConfig { - log_user_prompt, - environment, - exporter, - trace_exporter, - metrics_exporter, - } - }, + otel, }; Ok(config) }) diff --git a/codex-rs/core/src/config/otel.rs b/codex-rs/core/src/config/otel.rs new file mode 100644 index 0000000000..cb65d304d1 --- /dev/null +++ b/codex-rs/core/src/config/otel.rs @@ -0,0 +1,117 @@ +use std::collections::BTreeMap; +use std::fmt::Display; + +use codex_config::types::DEFAULT_OTEL_ENVIRONMENT; +use codex_config::types::OtelConfig; +use codex_config::types::OtelConfigToml; +use codex_config::types::OtelExporterKind; + +pub(crate) fn resolve_config( + config: OtelConfigToml, + startup_warnings: &mut Vec, +) -> OtelConfig { + let log_user_prompt = config.log_user_prompt.unwrap_or(false); + let environment = config + .environment + .unwrap_or_else(|| DEFAULT_OTEL_ENVIRONMENT.to_string()); + let exporter = config.exporter.unwrap_or(OtelExporterKind::None); + // OTLP HTTP endpoints are signal-specific in our config, so enabling log + // export must not implicitly send spans to a /v1/logs endpoint. + let trace_exporter = config.trace_exporter.unwrap_or(OtelExporterKind::None); + let metrics_exporter = config.metrics_exporter.unwrap_or(OtelExporterKind::Statsig); + // Provider initialization installs process-global OTEL state. Sanitize + // user-editable trace metadata here so malformed config is reported as a + // startup warning instead of making startup fail. + let span_attributes = resolve_span_attributes(config.span_attributes, startup_warnings); + let tracestate = resolve_tracestate(config.tracestate, startup_warnings); + + OtelConfig { + log_user_prompt, + environment, + exporter, + trace_exporter, + metrics_exporter, + span_attributes, + tracestate, + } +} + +fn resolve_span_attributes( + span_attributes: Option>, + startup_warnings: &mut Vec, +) -> BTreeMap { + let Some(span_attributes) = span_attributes else { + return BTreeMap::new(); + }; + + let mut valid_attributes = BTreeMap::new(); + for (key, value) in span_attributes { + let attribute = BTreeMap::from([(key.clone(), value.clone())]); + if let Err(err) = codex_otel::validate_span_attributes(&attribute) { + push_invalid_config_warning("otel.span_attributes", err, startup_warnings); + continue; + } + valid_attributes.insert(key, value); + } + + valid_attributes +} + +fn resolve_tracestate( + tracestate: Option>>, + startup_warnings: &mut Vec, +) -> BTreeMap> { + let Some(tracestate) = tracestate else { + return BTreeMap::new(); + }; + + let mut valid_entries = BTreeMap::new(); + for (member_key, fields) in tracestate { + let fields = resolve_tracestate_member_fields(&member_key, fields, startup_warnings); + if fields.is_empty() { + continue; + } + if let Err(err) = codex_otel::validate_tracestate_member(&member_key, &fields) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + continue; + } + valid_entries.insert(member_key, fields); + } + + // Tracestate members can be valid individually while the combined W3C + // tracestate header is not, so validate the filtered set before handing it + // to provider initialization. + if let Err(err) = codex_otel::validate_tracestate_entries(&valid_entries) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + return BTreeMap::new(); + } + + valid_entries +} + +fn resolve_tracestate_member_fields( + member_key: &str, + fields: BTreeMap, + startup_warnings: &mut Vec, +) -> BTreeMap { + let mut valid_fields = BTreeMap::new(); + for (field_key, value) in fields { + let field = BTreeMap::from([(field_key.clone(), value.clone())]); + if let Err(err) = codex_otel::validate_tracestate_member(member_key, &field) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + continue; + } + valid_fields.insert(field_key, value); + } + valid_fields +} + +fn push_invalid_config_warning( + config_key: &str, + err: impl Display, + startup_warnings: &mut Vec, +) { + let message = format!("Ignoring invalid `{config_key}` config: {err}"); + tracing::warn!("{message}"); + startup_warnings.push(message); +} diff --git a/codex-rs/core/src/otel_init.rs b/codex-rs/core/src/otel_init.rs index 41914570f3..0cd1f06994 100644 --- a/codex-rs/core/src/otel_init.rs +++ b/codex-rs/core/src/otel_init.rs @@ -89,6 +89,8 @@ pub fn build_provider( trace_exporter, metrics_exporter, runtime_metrics, + span_attributes: config.otel.span_attributes.clone(), + tracestate: config.otel.tracestate.clone(), }) } diff --git a/codex-rs/otel/README.md b/codex-rs/otel/README.md index 3739f5f026..afae1b2f03 100644 --- a/codex-rs/otel/README.md +++ b/codex-rs/otel/README.md @@ -39,6 +39,8 @@ let settings = OtelSettings { tls: None, }, metrics_exporter: OtelExporter::None, + span_attributes: std::collections::BTreeMap::new(), + tracestate: std::collections::BTreeMap::new(), }; if let Some(provider) = OtelProvider::from(&settings)? { @@ -49,6 +51,26 @@ if let Some(provider) = OtelProvider::from(&settings)? { } ``` +Configured span attributes and W3C tracestate member fields are applied to +exported trace spans and propagated trace context: + +```toml +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +``` + +Configured tracestate members and encoded values must be valid W3C tracestate. +Each nested table is encoded as semicolon-separated `key:value` fields inside +that member. If propagated trace context already has the named member, Codex +upserts configured fields and preserves other fields in that member. This +config shape does not support setting opaque tracestate member values. Invalid +trace metadata entries are ignored during config load and reported as startup +warnings. + ## SessionTelemetry (events) `SessionTelemetry` adds consistent metadata to tracing events and helps record diff --git a/codex-rs/otel/src/config.rs b/codex-rs/otel/src/config.rs index fa088df7d5..ab60ea601e 100644 --- a/codex-rs/otel/src/config.rs +++ b/codex-rs/otel/src/config.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::collections::HashMap; use std::path::PathBuf; @@ -34,6 +35,18 @@ pub(crate) fn resolve_exporter(exporter: &OtelExporter) -> OtelExporter { } } +/// Validates configured span attributes before they are attached to exported spans. +pub fn validate_span_attributes(attributes: &BTreeMap) -> std::io::Result<()> { + if attributes.keys().any(String::is_empty) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "configured span attribute key must not be empty", + )); + } + + Ok(()) +} + #[derive(Clone, Debug)] pub struct OtelSettings { pub environment: String, @@ -44,6 +57,8 @@ pub struct OtelSettings { pub trace_exporter: OtelExporter, pub metrics_exporter: OtelExporter, pub runtime_metrics: bool, + pub span_attributes: BTreeMap, + pub tracestate: BTreeMap>, } /// Resolved Statsig metrics settings that another process can use to recreate diff --git a/codex-rs/otel/src/lib.rs b/codex-rs/otel/src/lib.rs index 431ed331a0..1a689d3684 100644 --- a/codex-rs/otel/src/lib.rs +++ b/codex-rs/otel/src/lib.rs @@ -16,6 +16,7 @@ pub use crate::config::OtelHttpProtocol; pub use crate::config::OtelSettings; pub use crate::config::OtelTlsConfig; pub use crate::config::StatsigMetricsSettings; +pub use crate::config::validate_span_attributes; pub use crate::events::session_telemetry::AuthEnvTelemetryMetadata; pub use crate::events::session_telemetry::SessionTelemetry; pub use crate::events::session_telemetry::SessionTelemetryMetadata; @@ -31,6 +32,8 @@ pub use crate::trace_context::set_parent_from_context; pub use crate::trace_context::set_parent_from_w3c_trace_context; pub use crate::trace_context::span_w3c_trace_context; pub use crate::trace_context::traceparent_context_from_env; +pub use crate::trace_context::validate_tracestate_entries; +pub use crate::trace_context::validate_tracestate_member; pub use codex_utils_string::sanitize_metric_tag_value; #[derive(Debug, Clone, Serialize, Display)] diff --git a/codex-rs/otel/src/provider.rs b/codex-rs/otel/src/provider.rs index 72a1c7c9b5..88e6b85ae2 100644 --- a/codex-rs/otel/src/provider.rs +++ b/codex-rs/otel/src/provider.rs @@ -7,8 +7,10 @@ use crate::metrics::MetricsConfig; use crate::targets::is_log_export_target; use crate::targets::is_trace_safe_target; use gethostname::gethostname; +use opentelemetry::Context; use opentelemetry::KeyValue; use opentelemetry::global; +use opentelemetry::trace::Span as _; use opentelemetry::trace::TracerProvider as _; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_otlp::LogExporter; @@ -22,15 +24,22 @@ use opentelemetry_otlp::WithTonicConfig; use opentelemetry_otlp::tonic_types::metadata::MetadataMap; use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; use opentelemetry_sdk::Resource; +use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::logs::SdkLoggerProvider; use opentelemetry_sdk::propagation::TraceContextPropagator; use opentelemetry_sdk::runtime; use opentelemetry_sdk::trace::BatchSpanProcessor; use opentelemetry_sdk::trace::SdkTracerProvider; +use opentelemetry_sdk::trace::Span; +use opentelemetry_sdk::trace::SpanData; +use opentelemetry_sdk::trace::SpanProcessor; use opentelemetry_sdk::trace::Tracer; +use opentelemetry_sdk::trace::TracerProviderBuilder; use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor as TokioBatchSpanProcessor; use opentelemetry_semantic_conventions as semconv; +use std::collections::BTreeMap; use std::error::Error; +use std::time::Duration; use tracing::debug; use tracing_subscriber::Layer; use tracing_subscriber::registry::LookupSpan; @@ -68,8 +77,28 @@ impl OtelProvider { pub fn from(settings: &OtelSettings) -> Result, Box> { let log_enabled = !matches!(settings.exporter, OtelExporter::None); let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None); - let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter); + let metrics_enabled = !matches!(metric_exporter, OtelExporter::None); + + if !log_enabled && !trace_enabled && !metrics_enabled { + // Tracestate propagation is process-global; clear it when these + // settings do not install an active provider. + crate::trace_context::set_tracestate_entries(BTreeMap::new())?; + debug!("No OTEL exporter enabled in settings."); + return Ok(None); + } + + // Provider setup below installs process-global OTEL state that cannot + // be rolled back, so reject invalid trace metadata before any setup + // path can mutate those globals. + if trace_enabled && settings.span_attributes.keys().any(String::is_empty) { + return Err(Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "configured span attribute key must not be empty", + ))); + } + crate::trace_context::validate_tracestate_entries(&settings.tracestate)?; + let metrics = if matches!(metric_exporter, OtelExporter::None) { None } else { @@ -85,20 +114,6 @@ impl OtelProvider { Some(MetricsClient::new(config)?) }; - if let Some(metrics) = metrics.as_ref() { - crate::metrics::install_global(metrics.clone()); - if matches!(settings.metrics_exporter, OtelExporter::Statsig) { - crate::metrics::install_global_statsig_settings(StatsigMetricsSettings { - environment: settings.environment.clone(), - }); - } - } - - if !log_enabled && !trace_enabled && metrics.is_none() { - debug!("No OTEL exporter enabled in settings."); - return Ok(None); - } - let log_resource = make_resource(settings, ResourceKind::Logs); let trace_resource = make_resource(settings, ResourceKind::Traces); let logger = log_enabled @@ -106,17 +121,32 @@ impl OtelProvider { .transpose()?; let tracer_provider = trace_enabled - .then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter)) + .then(|| { + build_tracer_provider( + &trace_resource, + &settings.trace_exporter, + settings.span_attributes.clone(), + ) + }) .transpose()?; let tracer = tracer_provider .as_ref() .map(|provider| provider.tracer(settings.service_name.clone())); + crate::trace_context::set_tracestate_entries(settings.tracestate.clone())?; if let Some(provider) = tracer_provider.clone() { global::set_tracer_provider(provider); global::set_text_map_propagator(TraceContextPropagator::new()); } + if let Some(metrics) = metrics.as_ref() { + crate::metrics::install_global(metrics.clone()); + if matches!(settings.metrics_exporter, OtelExporter::Statsig) { + crate::metrics::install_global_statsig_settings(StatsigMetricsSettings { + environment: settings.environment.clone(), + }); + } + } Ok(Some(Self { logger, tracer_provider, @@ -222,6 +252,47 @@ fn normalize_host_name(host_name: &str) -> Option { (!host_name.is_empty()).then(|| host_name.to_owned()) } +fn tracer_provider_builder( + resource: &Resource, + span_attributes: BTreeMap, +) -> TracerProviderBuilder { + let builder = SdkTracerProvider::builder().with_resource(resource.clone()); + if span_attributes.is_empty() { + builder + } else { + builder.with_span_processor(SpanAttributesProcessor { + attributes: span_attributes, + }) + } +} + +/// Applies configured attributes when spans start. +/// +/// Resource attributes describe the provider process. These attributes are +/// per-span metadata, so they need to be attached before each span is exported. +#[derive(Debug)] +struct SpanAttributesProcessor { + attributes: BTreeMap, +} + +impl SpanProcessor for SpanAttributesProcessor { + fn on_start(&self, span: &mut Span, _cx: &Context) { + for (key, value) in self.attributes.iter() { + span.set_attribute(KeyValue::new(key.clone(), value.clone())); + } + } + + fn on_end(&self, _span: SpanData) {} + + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } + + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { + Ok(()) + } +} + fn build_logger( resource: &Resource, exporter: &OtelExporter, @@ -294,9 +365,10 @@ fn build_logger( fn build_tracer_provider( resource: &Resource, exporter: &OtelExporter, + span_attributes: BTreeMap, ) -> Result> { let span_exporter = match crate::config::resolve_exporter(exporter) { - OtelExporter::None => return Ok(SdkTracerProvider::builder().build()), + OtelExporter::None => return Ok(tracer_provider_builder(resource, span_attributes).build()), OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), OtelExporter::OtlpGrpc { endpoint, @@ -353,8 +425,7 @@ fn build_tracer_provider( TokioBatchSpanProcessor::builder(exporter_builder.build()?, runtime::Tokio) .build(); - return Ok(SdkTracerProvider::builder() - .with_resource(resource.clone()) + return Ok(tracer_provider_builder(resource, span_attributes) .with_span_processor(processor) .build()); } @@ -382,8 +453,7 @@ fn build_tracer_provider( let processor = BatchSpanProcessor::builder(span_exporter).build(); - Ok(SdkTracerProvider::builder() - .with_resource(resource.clone()) + Ok(tracer_provider_builder(resource, span_attributes) .with_span_processor(processor) .build()) } @@ -467,6 +537,8 @@ mod tests { trace_exporter: OtelExporter::None, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), } } } diff --git a/codex-rs/otel/src/trace_context.rs b/codex-rs/otel/src/trace_context.rs index 010078e990..c625a416a7 100644 --- a/codex-rs/otel/src/trace_context.rs +++ b/codex-rs/otel/src/trace_context.rs @@ -1,11 +1,16 @@ +use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::HashMap; use std::env; +use std::str::FromStr; use std::sync::OnceLock; +use std::sync::RwLock; use codex_protocol::protocol::W3cTraceContext; use opentelemetry::Context; use opentelemetry::propagation::TextMapPropagator; use opentelemetry::trace::TraceContextExt; +use opentelemetry::trace::TraceState; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing::Span; use tracing::debug; @@ -16,6 +21,11 @@ const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT"; const TRACESTATE_ENV_VAR: &str = "TRACESTATE"; static TRACEPARENT_CONTEXT: OnceLock> = OnceLock::new(); +// Trace context propagation can happen outside the provider object, so configured +// tracestate lives beside the process-global tracer provider. +static TRACESTATE_ENTRIES: OnceLock>>> = + OnceLock::new(); + pub fn current_span_w3c_trace_context() -> Option { span_w3c_trace_context(&Span::current()) } @@ -28,13 +38,28 @@ pub fn span_w3c_trace_context(span: &Span) -> Option { let mut headers = HashMap::new(); TraceContextPropagator::new().inject_context(&context, &mut headers); + let tracestate = headers.remove("tracestate"); + let configured_tracestate_guard = tracestate_entries() + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); Some(W3cTraceContext { traceparent: headers.remove("traceparent"), - tracestate: headers.remove("tracestate"), + tracestate: merge_tracestate_entries(tracestate.as_deref(), &configured_tracestate_guard), }) } +pub(crate) fn set_tracestate_entries( + entries: BTreeMap>, +) -> Result<(), Box> { + validate_tracestate_entries(&entries)?; + let mut guard = tracestate_entries() + .write() + .unwrap_or_else(std::sync::PoisonError::into_inner); + *guard = entries; + Ok(()) +} + pub fn current_span_trace_id() -> Option { let context = Span::current().context(); let span = context.span(); @@ -103,6 +128,177 @@ fn load_traceparent_context() -> Option { } } +fn tracestate_entries() -> &'static RwLock>> { + TRACESTATE_ENTRIES.get_or_init(|| RwLock::new(BTreeMap::new())) +} + +fn merge_tracestate_entries( + tracestate: Option<&str>, + configured_entries: &BTreeMap>, +) -> Option { + let mut trace_state = tracestate + .and_then(|tracestate| match TraceState::from_str(tracestate) { + Ok(trace_state) => Some(trace_state), + Err(err) => { + warn!("ignoring invalid tracestate while propagating trace context: {err}"); + None + } + }) + .unwrap_or_default(); + + // TraceState::insert places members at the front. Reverse iteration keeps + // deterministic map order while upserting fields inside configured members. + for (key, fields) in configured_entries.iter().rev() { + let value = merge_tracestate_member_fields(trace_state.get(key), fields); + trace_state = match trace_state.insert(key.clone(), value) { + Ok(trace_state) => trace_state, + Err(err) => { + warn!("ignoring configured tracestate while propagating trace context: {err}"); + break; + } + }; + } + + let tracestate = trace_state.header(); + (!tracestate.is_empty()).then_some(tracestate) +} + +/// Validates configured tracestate members before they are propagated in W3C trace context. +pub fn validate_tracestate_entries( + entries: &BTreeMap>, +) -> Result<(), Box> { + // Reject malformed entries before installing them so propagated trace + // context remains acceptable to other W3C Trace Context extractors. The + // SDK validates member keys and list structure, but configured member + // fields are joined into header values here and need stricter validation. + let entries = entries + .iter() + .map(|(key, fields)| encode_tracestate_member_fields(key, fields)) + .collect::, _>>()?; + TraceState::from_key_value( + entries + .iter() + .map(|(key, value)| (key.as_str(), value.as_str())), + ) + .map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid configured tracestate: {err}"), + ) + })?; + Ok(()) +} + +/// Validates one configured tracestate member and its encoded field value. +pub fn validate_tracestate_member( + member_key: &str, + fields: &BTreeMap, +) -> Result<(), Box> { + let (key, value) = encode_tracestate_member_fields(member_key, fields)?; + TraceState::from_key_value([(key.as_str(), value.as_str())]).map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid configured tracestate: {err}"), + ) + })?; + Ok(()) +} + +fn encode_tracestate_member_fields( + member_key: &str, + fields: &BTreeMap, +) -> Result<(String, String), Box> { + // Configured fields are encoded into one opaque tracestate member value. + // Validate both the field grammar and the final header value so malformed + // config cannot produce propagated trace context that downstream W3C + // extractors reject. + let mut encoded = Vec::with_capacity(fields.len()); + for (field_key, value) in fields { + if !is_configured_tracestate_field_key(field_key) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate field key {member_key}.{field_key}" + ))); + } + if !is_configured_tracestate_field_value(value) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate value for {member_key}.{field_key}" + ))); + } + encoded.push(format!("{field_key}:{value}")); + } + let value = encoded.join(";"); + if !is_header_safe_tracestate_member_value(&value) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate value for {member_key}" + ))); + } + Ok((member_key.to_string(), value)) +} + +fn is_configured_tracestate_field_key(field_key: &str) -> bool { + !field_key.is_empty() + && field_key + .bytes() + .all(|byte| matches!(byte, b'!'..=b'~') && !matches!(byte, b':' | b';' | b',' | b'=')) +} + +fn is_configured_tracestate_field_value(value: &str) -> bool { + value + .bytes() + .all(|byte| is_tracestate_member_value_byte(byte) && byte != b';') +} + +fn is_header_safe_tracestate_member_value(value: &str) -> bool { + value.is_empty() + || (value.bytes().all(is_tracestate_member_value_byte) + && value.as_bytes().last().is_some_and(|byte| *byte != b' ')) +} + +fn is_tracestate_member_value_byte(byte: u8) -> bool { + matches!(byte, b' '..=b'~') && !matches!(byte, b',' | b'=') +} + +fn invalid_tracestate_config(message: String) -> Box { + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + message, + )) +} + +fn merge_tracestate_member_fields( + existing: Option<&str>, + configured_fields: &BTreeMap, +) -> String { + // W3C TraceState treats member values as opaque strings. The config models + // values as semicolon-separated key:value fields so selected fields can be + // upserted without replacing unrelated fields in the same member. + let mut fields = Vec::new(); + let mut seen = BTreeSet::new(); + + if let Some(existing) = existing { + for field in existing.split(';').filter(|field| !field.is_empty()) { + if let Some((field_key, _)) = field.split_once(':') { + if let Some(value) = configured_fields.get(field_key) { + if seen.insert(field_key) { + fields.push(format!("{field_key}:{value}")); + } + continue; + } + seen.insert(field_key); + } + fields.push(field.to_string()); + } + } + + fields.extend( + configured_fields + .iter() + .filter(|(field_key, _)| !seen.contains(field_key.as_str())) + .map(|(field_key, value)| format!("{field_key}:{value}")), + ); + fields.join(";") +} + #[cfg(test)] mod tests { use super::context_from_trace_headers; diff --git a/codex-rs/otel/tests/suite/otlp_http_loopback.rs b/codex-rs/otel/tests/suite/otlp_http_loopback.rs index fd4e3531d8..4c2dd36f76 100644 --- a/codex-rs/otel/tests/suite/otlp_http_loopback.rs +++ b/codex-rs/otel/tests/suite/otlp_http_loopback.rs @@ -5,18 +5,25 @@ use codex_otel::OtelHttpProtocol; use codex_otel::OtelProvider; use codex_otel::OtelSettings; use codex_otel::Result; +use codex_otel::current_span_w3c_trace_context; +use codex_otel::set_parent_from_w3c_trace_context; +use codex_protocol::protocol::W3cTraceContext; +use std::collections::BTreeMap; use std::collections::HashMap; use std::io::Read as _; use std::io::Write as _; use std::net::TcpListener; use std::net::TcpStream; use std::path::PathBuf; +use std::sync::Mutex; use std::sync::mpsc; use std::thread; use std::time::Duration; use std::time::Instant; use tracing_subscriber::layer::SubscriberExt; +static TRACE_CONTEXT_CONFIG_LOCK: Mutex<()> = Mutex::new(()); + struct CapturedRequest { path: String, content_type: Option, @@ -217,9 +224,41 @@ fn otlp_http_exporter_sends_metrics_to_collector() -> Result<()> { Ok(()) } +#[test] +fn otel_provider_rejects_header_unsafe_configured_tracestate() { + let result = OtelProvider::from(&OtelSettings { + environment: "test".to_string(), + service_name: "codex-cli".to_string(), + service_version: env!("CARGO_PKG_VERSION").to_string(), + codex_home: PathBuf::from("."), + exporter: OtelExporter::None, + trace_exporter: OtelExporter::OtlpHttp { + endpoint: "http://127.0.0.1:1/v1/traces".to_string(), + headers: HashMap::new(), + protocol: OtelHttpProtocol::Json, + tls: None, + }, + metrics_exporter: OtelExporter::None, + runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::from([( + "example".to_string(), + BTreeMap::from([("alpha".to_string(), "one\ntwo".to_string())]), + )]), + }); + + let Err(err) = result else { + panic!("expected header-unsafe configured tracestate to be rejected"); + }; + assert!(err.to_string().contains("configured tracestate value")); +} + #[test] fn otlp_http_exporter_sends_traces_to_collector() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -266,12 +305,23 @@ fn otlp_http_exporter_sends_traces_to_collector() }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::from([( + "test.configured_attribute".to_string(), + "configured-value".to_string(), + )]), + tracestate: BTreeMap::from([( + "example".to_string(), + BTreeMap::from([ + ("alpha".to_string(), "one".to_string()), + ("beta".to_string(), "two".to_string()), + ]), + )]), })? .expect("otel provider"); let tracing_layer = otel.tracing_layer().expect("tracing layer"); let subscriber = tracing_subscriber::registry().with(tracing_layer); - tracing::subscriber::with_default(subscriber, || { + let propagated_trace = tracing::subscriber::with_default(subscriber, || { let span = tracing::info_span!( "trace-loopback", otel.name = "trace-loopback", @@ -279,11 +329,28 @@ fn otlp_http_exporter_sends_traces_to_collector() rpc.system = "jsonrpc", rpc.method = "trace-loopback", ); + assert!(set_parent_from_w3c_trace_context( + &span, + &W3cTraceContext { + traceparent: Some( + "00-00000000000000000000000000000001-0000000000000002-01".to_string(), + ), + tracestate: Some("example=alpha:zero;keep:yes,other=value".to_string()), + }, + )); let _guard = span.enter(); + let propagated_trace = + current_span_w3c_trace_context().expect("current span should have trace context"); tracing::info!("trace loopback event"); + propagated_trace }); otel.shutdown(); + assert_eq!( + propagated_trace.tracestate.as_deref(), + Some("example=alpha:one;keep:yes;beta:two,other=value") + ); + server.join().expect("server join"); let captured = rx.recv_timeout(Duration::from_secs(1)).expect("captured"); @@ -321,6 +388,11 @@ fn otlp_http_exporter_sends_traces_to_collector() "expected service name not found; body prefix: {}", &body.chars().take(2000).collect::() ); + assert!( + body.contains("test.configured_attribute") && body.contains("configured-value"), + "expected configured span attribute not found; body prefix: {}", + &body.chars().take(2000).collect::() + ); Ok(()) } @@ -328,6 +400,9 @@ fn otlp_http_exporter_sends_traces_to_collector() #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -374,6 +449,8 @@ async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), })? .expect("otel provider"); let tracing_layer = otel.tracing_layer().expect("tracing layer"); @@ -436,6 +513,9 @@ async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() #[test] fn otlp_http_exporter_sends_traces_to_collector_in_current_thread_tokio_runtime() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -490,6 +570,8 @@ fn otlp_http_exporter_sends_traces_to_collector_in_current_thread_tokio_runtime( }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), }) .map_err(|err| err.to_string())? .expect("otel provider"); diff --git a/codex-rs/windows-sandbox-rs/src/wfp_setup.rs b/codex-rs/windows-sandbox-rs/src/wfp_setup.rs index bfcc2c069b..89d29118c2 100644 --- a/codex-rs/windows-sandbox-rs/src/wfp_setup.rs +++ b/codex-rs/windows-sandbox-rs/src/wfp_setup.rs @@ -5,6 +5,7 @@ use codex_otel::OtelExporter; use codex_otel::OtelProvider; use codex_otel::OtelSettings; use codex_otel::StatsigMetricsSettings; +use std::collections::BTreeMap; use std::path::Path; const WFP_SETUP_SERVICE_NAME: &str = "codex-windows-sandbox-setup"; @@ -54,6 +55,8 @@ fn build_wfp_metrics_provider( trace_exporter: OtelExporter::None, metrics_exporter: OtelExporter::Statsig, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), }) .map_err(|err| anyhow::anyhow!("failed to initialize WFP setup metrics provider: {err}")) } diff --git a/docs/config.md b/docs/config.md index e0a0c5fc2f..759d6b94b8 100644 --- a/docs/config.md +++ b/docs/config.md @@ -26,3 +26,24 @@ When enabled, Codex appends a `Co-authored-by:` trailer using the configured attribution value. If `commit_attribution` is omitted, Codex uses `Codex `. Set `commit_attribution = ""` to disable the trailer while leaving the feature flag enabled. + +## OpenTelemetry Trace Metadata + +Codex can add static OpenTelemetry span attributes to exported trace spans and +static W3C tracestate fields to propagated trace context: + +```toml +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +``` + +Nested `otel.tracestate` tables are encoded as semicolon-separated `key:value` +fields inside the named tracestate member. If propagated trace context already +has the named member, Codex upserts configured fields and preserves other fields +in that member. This config shape does not support setting opaque tracestate +member values. Invalid trace metadata entries are ignored during config load and +reported as startup warnings.