diff --git a/codex-rs/config/src/loader/mod.rs b/codex-rs/config/src/loader/mod.rs index f5f8ec44e5..e9f819bcf9 100644 --- a/codex-rs/config/src/loader/mod.rs +++ b/codex-rs/config/src/loader/mod.rs @@ -60,6 +60,7 @@ const PROJECT_LOCAL_CONFIG_DENYLIST: &[&str] = &[ "profile", "profiles", "experimental_realtime_ws_base_url", + "otel", ]; async fn first_layer_config_error_from_entries(layers: &[ConfigLayerEntry]) -> Option { diff --git a/codex-rs/config/src/types.rs b/codex-rs/config/src/types.rs index 6cb9abc507..39fd0a442f 100644 --- a/codex-rs/config/src/types.rs +++ b/codex-rs/config/src/types.rs @@ -514,6 +514,12 @@ pub struct OtelConfigToml { /// Optional metrics exporter pub metrics_exporter: Option, + + /// Attributes to add to every exported trace span. + pub span_attributes: Option>, + + /// Semicolon-separated `key:value` fields to upsert into W3C tracestate members. + pub tracestate: Option>>, } /// Effective OTEL settings after defaults are applied. @@ -524,6 +530,8 @@ pub struct OtelConfig { pub exporter: OtelExporterKind, pub trace_exporter: OtelExporterKind, pub metrics_exporter: OtelExporterKind, + pub span_attributes: BTreeMap, + pub tracestate: BTreeMap>, } impl Default for OtelConfig { @@ -534,6 +542,8 @@ impl Default for OtelConfig { exporter: OtelExporterKind::None, trace_exporter: OtelExporterKind::None, metrics_exporter: OtelExporterKind::Statsig, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), } } } diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 6eab1d4c55..c50c4d97af 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -1695,6 +1695,13 @@ ], "description": "Optional metrics exporter" }, + "span_attributes": { + "additionalProperties": { + "type": "string" + }, + "description": "Attributes to add to every exported trace span.", + "type": "object" + }, "trace_exporter": { "allOf": [ { @@ -1702,6 +1709,16 @@ } ], "description": "Optional trace exporter" + }, + "tracestate": { + "additionalProperties": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "description": "Semicolon-separated `key:value` fields to upsert into W3C tracestate members.", + "type": "object" } }, "type": "object" diff --git a/codex-rs/core/src/config/config_loader_tests.rs b/codex-rs/core/src/config/config_loader_tests.rs index 6fcd5f872d..4a7a33b7e6 100644 --- a/codex-rs/core/src/config/config_loader_tests.rs +++ b/codex-rs/core/src/config/config_loader_tests.rs @@ -1752,6 +1752,9 @@ notify = ["sh", "-c", "echo attacker"] profile = "attacker" experimental_realtime_ws_base_url = "wss://attacker.example/realtime" +[otel] +environment = "attacker" + [profiles.attacker] model = "attacker-model" model_instructions_file = 1 @@ -1801,6 +1804,7 @@ wire_api = "responses" "profile", "profiles", "experimental_realtime_ws_base_url", + "otel", ]; let expected_startup_warnings = vec![format!( concat!( diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index bc9af28a67..368ad769db 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -44,6 +44,9 @@ use codex_config::types::Notice; use codex_config::types::NotificationCondition; use codex_config::types::NotificationMethod; use codex_config::types::Notifications; +use codex_config::types::OtelConfig; +use codex_config::types::OtelConfigToml; +use codex_config::types::OtelExporterKind; use codex_config::types::SandboxWorkspaceWrite; use codex_config::types::SessionPickerViewMode; use codex_config::types::SkillsConfig; @@ -7118,6 +7121,119 @@ async fn trace_exporter_defaults_to_none_when_log_exporter_is_set() -> std::io:: Ok(()) } +#[tokio::test] +async fn load_config_applies_otel_trace_metadata() -> std::io::Result<()> { + let mut fixture = create_test_fixture()?; + fixture.cfg = toml::from_str( + r#" +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +"#, + ) + .expect("TOML deserialization should succeed"); + + let config = Config::load_from_base_config_with_overrides( + fixture.cfg.clone(), + ConfigOverrides { + cwd: Some(fixture.cwd_path()), + ..Default::default() + }, + fixture.codex_home(), + ) + .await?; + + assert_eq!( + config.otel.span_attributes, + BTreeMap::from([("example.trace_attr".to_string(), "enabled".to_string())]) + ); + assert_eq!( + config.otel.tracestate, + BTreeMap::from([( + "example".to_string(), + BTreeMap::from([ + ("alpha".to_string(), "one".to_string()), + ("beta".to_string(), "two".to_string()), + ]), + )]) + ); + Ok(()) +} + +#[tokio::test] +async fn load_config_drops_invalid_otel_trace_metadata_entries() -> std::io::Result<()> { + let mut fixture = create_test_fixture()?; + fixture.cfg = toml::from_str( + r#" +[otel] +environment = "test" + +[otel.span_attributes] +"" = "missing-key" +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two\ntoo" + +[otel.tracestate.bad] +alpha = "one\ntwo" +"#, + ) + .expect("TOML deserialization should succeed"); + + let config = Config::load_from_base_config_with_overrides( + fixture.cfg.clone(), + ConfigOverrides { + cwd: Some(fixture.cwd_path()), + ..Default::default() + }, + fixture.codex_home(), + ) + .await?; + + assert_eq!(config.otel.environment, "test"); + assert_eq!( + config.otel.span_attributes, + BTreeMap::from([("example.trace_attr".to_string(), "enabled".to_string())]) + ); + assert_eq!( + config.otel.tracestate, + BTreeMap::from([( + "example".to_string(), + BTreeMap::from([("alpha".to_string(), "one".to_string())]), + )]) + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.span_attributes` config") + && warning.contains("configured span attribute key must not be empty") + }), + "{:?}", + config.startup_warnings + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.tracestate` config") + && warning.contains("invalid configured tracestate value for example.beta") + }), + "{:?}", + config.startup_warnings + ); + assert!( + config.startup_warnings.iter().any(|warning| { + warning.contains("Ignoring invalid `otel.tracestate` config") + && warning.contains("invalid configured tracestate value for bad.alpha") + }), + "{:?}", + config.startup_warnings + ); + Ok(()) +} + #[tokio::test] async fn explicit_null_service_tier_override_sets_fast_default_opt_out() -> std::io::Result<()> { let fixture = create_test_fixture()?; diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index b2dfbcb2b8..cf0ccbe544 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -37,7 +37,6 @@ use codex_config::profile_toml::ConfigProfile; use codex_config::sandbox_mode_requirement_for_permission_profile; use codex_config::types::ApprovalsReviewer; use codex_config::types::AuthCredentialsStoreMode; -use codex_config::types::DEFAULT_OTEL_ENVIRONMENT; use codex_config::types::History; use codex_config::types::McpServerConfig; use codex_config::types::McpServerDisabledReason; @@ -46,9 +45,6 @@ use codex_config::types::MemoriesConfig; use codex_config::types::ModelAvailabilityNuxConfig; use codex_config::types::Notice; use codex_config::types::OAuthCredentialsStoreMode; -use codex_config::types::OtelConfig; -use codex_config::types::OtelConfigToml; -use codex_config::types::OtelExporterKind; use codex_config::types::SessionPickerViewMode; use codex_config::types::ToolSuggestConfig; use codex_config::types::ToolSuggestDisabledTool; @@ -132,6 +128,7 @@ pub(crate) mod agent_roles; pub mod edit; mod managed_features; mod network_proxy_spec; +mod otel; mod permissions; #[cfg(test)] mod schema; @@ -2978,6 +2975,7 @@ impl Config { .value .set(effective_permission_profile) .map_err(std::io::Error::from)?; + let otel = otel::resolve_config(cfg.otel.unwrap_or_default(), &mut startup_warnings); let config = Self { model, service_tier, @@ -3205,26 +3203,7 @@ impl Config { .as_ref() .map(|t| t.keymap.clone()) .unwrap_or_default(), - otel: { - let t: OtelConfigToml = cfg.otel.unwrap_or_default(); - let log_user_prompt = t.log_user_prompt.unwrap_or(false); - let environment = t - .environment - .unwrap_or(DEFAULT_OTEL_ENVIRONMENT.to_string()); - let exporter = t.exporter.unwrap_or(OtelExporterKind::None); - // OTLP HTTP endpoints are signal-specific in our config, so - // enabling log export must not implicitly send spans to a - // /v1/logs endpoint. - let trace_exporter = t.trace_exporter.unwrap_or(OtelExporterKind::None); - let metrics_exporter = t.metrics_exporter.unwrap_or(OtelExporterKind::Statsig); - OtelConfig { - log_user_prompt, - environment, - exporter, - trace_exporter, - metrics_exporter, - } - }, + otel, }; Ok(config) }) diff --git a/codex-rs/core/src/config/otel.rs b/codex-rs/core/src/config/otel.rs new file mode 100644 index 0000000000..cb65d304d1 --- /dev/null +++ b/codex-rs/core/src/config/otel.rs @@ -0,0 +1,117 @@ +use std::collections::BTreeMap; +use std::fmt::Display; + +use codex_config::types::DEFAULT_OTEL_ENVIRONMENT; +use codex_config::types::OtelConfig; +use codex_config::types::OtelConfigToml; +use codex_config::types::OtelExporterKind; + +pub(crate) fn resolve_config( + config: OtelConfigToml, + startup_warnings: &mut Vec, +) -> OtelConfig { + let log_user_prompt = config.log_user_prompt.unwrap_or(false); + let environment = config + .environment + .unwrap_or_else(|| DEFAULT_OTEL_ENVIRONMENT.to_string()); + let exporter = config.exporter.unwrap_or(OtelExporterKind::None); + // OTLP HTTP endpoints are signal-specific in our config, so enabling log + // export must not implicitly send spans to a /v1/logs endpoint. + let trace_exporter = config.trace_exporter.unwrap_or(OtelExporterKind::None); + let metrics_exporter = config.metrics_exporter.unwrap_or(OtelExporterKind::Statsig); + // Provider initialization installs process-global OTEL state. Sanitize + // user-editable trace metadata here so malformed config is reported as a + // startup warning instead of making startup fail. + let span_attributes = resolve_span_attributes(config.span_attributes, startup_warnings); + let tracestate = resolve_tracestate(config.tracestate, startup_warnings); + + OtelConfig { + log_user_prompt, + environment, + exporter, + trace_exporter, + metrics_exporter, + span_attributes, + tracestate, + } +} + +fn resolve_span_attributes( + span_attributes: Option>, + startup_warnings: &mut Vec, +) -> BTreeMap { + let Some(span_attributes) = span_attributes else { + return BTreeMap::new(); + }; + + let mut valid_attributes = BTreeMap::new(); + for (key, value) in span_attributes { + let attribute = BTreeMap::from([(key.clone(), value.clone())]); + if let Err(err) = codex_otel::validate_span_attributes(&attribute) { + push_invalid_config_warning("otel.span_attributes", err, startup_warnings); + continue; + } + valid_attributes.insert(key, value); + } + + valid_attributes +} + +fn resolve_tracestate( + tracestate: Option>>, + startup_warnings: &mut Vec, +) -> BTreeMap> { + let Some(tracestate) = tracestate else { + return BTreeMap::new(); + }; + + let mut valid_entries = BTreeMap::new(); + for (member_key, fields) in tracestate { + let fields = resolve_tracestate_member_fields(&member_key, fields, startup_warnings); + if fields.is_empty() { + continue; + } + if let Err(err) = codex_otel::validate_tracestate_member(&member_key, &fields) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + continue; + } + valid_entries.insert(member_key, fields); + } + + // Tracestate members can be valid individually while the combined W3C + // tracestate header is not, so validate the filtered set before handing it + // to provider initialization. + if let Err(err) = codex_otel::validate_tracestate_entries(&valid_entries) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + return BTreeMap::new(); + } + + valid_entries +} + +fn resolve_tracestate_member_fields( + member_key: &str, + fields: BTreeMap, + startup_warnings: &mut Vec, +) -> BTreeMap { + let mut valid_fields = BTreeMap::new(); + for (field_key, value) in fields { + let field = BTreeMap::from([(field_key.clone(), value.clone())]); + if let Err(err) = codex_otel::validate_tracestate_member(member_key, &field) { + push_invalid_config_warning("otel.tracestate", err, startup_warnings); + continue; + } + valid_fields.insert(field_key, value); + } + valid_fields +} + +fn push_invalid_config_warning( + config_key: &str, + err: impl Display, + startup_warnings: &mut Vec, +) { + let message = format!("Ignoring invalid `{config_key}` config: {err}"); + tracing::warn!("{message}"); + startup_warnings.push(message); +} diff --git a/codex-rs/core/src/otel_init.rs b/codex-rs/core/src/otel_init.rs index 41914570f3..0cd1f06994 100644 --- a/codex-rs/core/src/otel_init.rs +++ b/codex-rs/core/src/otel_init.rs @@ -89,6 +89,8 @@ pub fn build_provider( trace_exporter, metrics_exporter, runtime_metrics, + span_attributes: config.otel.span_attributes.clone(), + tracestate: config.otel.tracestate.clone(), }) } diff --git a/codex-rs/otel/README.md b/codex-rs/otel/README.md index 3739f5f026..afae1b2f03 100644 --- a/codex-rs/otel/README.md +++ b/codex-rs/otel/README.md @@ -39,6 +39,8 @@ let settings = OtelSettings { tls: None, }, metrics_exporter: OtelExporter::None, + span_attributes: std::collections::BTreeMap::new(), + tracestate: std::collections::BTreeMap::new(), }; if let Some(provider) = OtelProvider::from(&settings)? { @@ -49,6 +51,26 @@ if let Some(provider) = OtelProvider::from(&settings)? { } ``` +Configured span attributes and W3C tracestate member fields are applied to +exported trace spans and propagated trace context: + +```toml +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +``` + +Configured tracestate members and encoded values must be valid W3C tracestate. +Each nested table is encoded as semicolon-separated `key:value` fields inside +that member. If propagated trace context already has the named member, Codex +upserts configured fields and preserves other fields in that member. This +config shape does not support setting opaque tracestate member values. Invalid +trace metadata entries are ignored during config load and reported as startup +warnings. + ## SessionTelemetry (events) `SessionTelemetry` adds consistent metadata to tracing events and helps record diff --git a/codex-rs/otel/src/config.rs b/codex-rs/otel/src/config.rs index fa088df7d5..ab60ea601e 100644 --- a/codex-rs/otel/src/config.rs +++ b/codex-rs/otel/src/config.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::collections::HashMap; use std::path::PathBuf; @@ -34,6 +35,18 @@ pub(crate) fn resolve_exporter(exporter: &OtelExporter) -> OtelExporter { } } +/// Validates configured span attributes before they are attached to exported spans. +pub fn validate_span_attributes(attributes: &BTreeMap) -> std::io::Result<()> { + if attributes.keys().any(String::is_empty) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "configured span attribute key must not be empty", + )); + } + + Ok(()) +} + #[derive(Clone, Debug)] pub struct OtelSettings { pub environment: String, @@ -44,6 +57,8 @@ pub struct OtelSettings { pub trace_exporter: OtelExporter, pub metrics_exporter: OtelExporter, pub runtime_metrics: bool, + pub span_attributes: BTreeMap, + pub tracestate: BTreeMap>, } /// Resolved Statsig metrics settings that another process can use to recreate diff --git a/codex-rs/otel/src/lib.rs b/codex-rs/otel/src/lib.rs index 431ed331a0..1a689d3684 100644 --- a/codex-rs/otel/src/lib.rs +++ b/codex-rs/otel/src/lib.rs @@ -16,6 +16,7 @@ pub use crate::config::OtelHttpProtocol; pub use crate::config::OtelSettings; pub use crate::config::OtelTlsConfig; pub use crate::config::StatsigMetricsSettings; +pub use crate::config::validate_span_attributes; pub use crate::events::session_telemetry::AuthEnvTelemetryMetadata; pub use crate::events::session_telemetry::SessionTelemetry; pub use crate::events::session_telemetry::SessionTelemetryMetadata; @@ -31,6 +32,8 @@ pub use crate::trace_context::set_parent_from_context; pub use crate::trace_context::set_parent_from_w3c_trace_context; pub use crate::trace_context::span_w3c_trace_context; pub use crate::trace_context::traceparent_context_from_env; +pub use crate::trace_context::validate_tracestate_entries; +pub use crate::trace_context::validate_tracestate_member; pub use codex_utils_string::sanitize_metric_tag_value; #[derive(Debug, Clone, Serialize, Display)] diff --git a/codex-rs/otel/src/provider.rs b/codex-rs/otel/src/provider.rs index 72a1c7c9b5..88e6b85ae2 100644 --- a/codex-rs/otel/src/provider.rs +++ b/codex-rs/otel/src/provider.rs @@ -7,8 +7,10 @@ use crate::metrics::MetricsConfig; use crate::targets::is_log_export_target; use crate::targets::is_trace_safe_target; use gethostname::gethostname; +use opentelemetry::Context; use opentelemetry::KeyValue; use opentelemetry::global; +use opentelemetry::trace::Span as _; use opentelemetry::trace::TracerProvider as _; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_otlp::LogExporter; @@ -22,15 +24,22 @@ use opentelemetry_otlp::WithTonicConfig; use opentelemetry_otlp::tonic_types::metadata::MetadataMap; use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; use opentelemetry_sdk::Resource; +use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::logs::SdkLoggerProvider; use opentelemetry_sdk::propagation::TraceContextPropagator; use opentelemetry_sdk::runtime; use opentelemetry_sdk::trace::BatchSpanProcessor; use opentelemetry_sdk::trace::SdkTracerProvider; +use opentelemetry_sdk::trace::Span; +use opentelemetry_sdk::trace::SpanData; +use opentelemetry_sdk::trace::SpanProcessor; use opentelemetry_sdk::trace::Tracer; +use opentelemetry_sdk::trace::TracerProviderBuilder; use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor as TokioBatchSpanProcessor; use opentelemetry_semantic_conventions as semconv; +use std::collections::BTreeMap; use std::error::Error; +use std::time::Duration; use tracing::debug; use tracing_subscriber::Layer; use tracing_subscriber::registry::LookupSpan; @@ -68,8 +77,28 @@ impl OtelProvider { pub fn from(settings: &OtelSettings) -> Result, Box> { let log_enabled = !matches!(settings.exporter, OtelExporter::None); let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None); - let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter); + let metrics_enabled = !matches!(metric_exporter, OtelExporter::None); + + if !log_enabled && !trace_enabled && !metrics_enabled { + // Tracestate propagation is process-global; clear it when these + // settings do not install an active provider. + crate::trace_context::set_tracestate_entries(BTreeMap::new())?; + debug!("No OTEL exporter enabled in settings."); + return Ok(None); + } + + // Provider setup below installs process-global OTEL state that cannot + // be rolled back, so reject invalid trace metadata before any setup + // path can mutate those globals. + if trace_enabled && settings.span_attributes.keys().any(String::is_empty) { + return Err(Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "configured span attribute key must not be empty", + ))); + } + crate::trace_context::validate_tracestate_entries(&settings.tracestate)?; + let metrics = if matches!(metric_exporter, OtelExporter::None) { None } else { @@ -85,20 +114,6 @@ impl OtelProvider { Some(MetricsClient::new(config)?) }; - if let Some(metrics) = metrics.as_ref() { - crate::metrics::install_global(metrics.clone()); - if matches!(settings.metrics_exporter, OtelExporter::Statsig) { - crate::metrics::install_global_statsig_settings(StatsigMetricsSettings { - environment: settings.environment.clone(), - }); - } - } - - if !log_enabled && !trace_enabled && metrics.is_none() { - debug!("No OTEL exporter enabled in settings."); - return Ok(None); - } - let log_resource = make_resource(settings, ResourceKind::Logs); let trace_resource = make_resource(settings, ResourceKind::Traces); let logger = log_enabled @@ -106,17 +121,32 @@ impl OtelProvider { .transpose()?; let tracer_provider = trace_enabled - .then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter)) + .then(|| { + build_tracer_provider( + &trace_resource, + &settings.trace_exporter, + settings.span_attributes.clone(), + ) + }) .transpose()?; let tracer = tracer_provider .as_ref() .map(|provider| provider.tracer(settings.service_name.clone())); + crate::trace_context::set_tracestate_entries(settings.tracestate.clone())?; if let Some(provider) = tracer_provider.clone() { global::set_tracer_provider(provider); global::set_text_map_propagator(TraceContextPropagator::new()); } + if let Some(metrics) = metrics.as_ref() { + crate::metrics::install_global(metrics.clone()); + if matches!(settings.metrics_exporter, OtelExporter::Statsig) { + crate::metrics::install_global_statsig_settings(StatsigMetricsSettings { + environment: settings.environment.clone(), + }); + } + } Ok(Some(Self { logger, tracer_provider, @@ -222,6 +252,47 @@ fn normalize_host_name(host_name: &str) -> Option { (!host_name.is_empty()).then(|| host_name.to_owned()) } +fn tracer_provider_builder( + resource: &Resource, + span_attributes: BTreeMap, +) -> TracerProviderBuilder { + let builder = SdkTracerProvider::builder().with_resource(resource.clone()); + if span_attributes.is_empty() { + builder + } else { + builder.with_span_processor(SpanAttributesProcessor { + attributes: span_attributes, + }) + } +} + +/// Applies configured attributes when spans start. +/// +/// Resource attributes describe the provider process. These attributes are +/// per-span metadata, so they need to be attached before each span is exported. +#[derive(Debug)] +struct SpanAttributesProcessor { + attributes: BTreeMap, +} + +impl SpanProcessor for SpanAttributesProcessor { + fn on_start(&self, span: &mut Span, _cx: &Context) { + for (key, value) in self.attributes.iter() { + span.set_attribute(KeyValue::new(key.clone(), value.clone())); + } + } + + fn on_end(&self, _span: SpanData) {} + + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } + + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { + Ok(()) + } +} + fn build_logger( resource: &Resource, exporter: &OtelExporter, @@ -294,9 +365,10 @@ fn build_logger( fn build_tracer_provider( resource: &Resource, exporter: &OtelExporter, + span_attributes: BTreeMap, ) -> Result> { let span_exporter = match crate::config::resolve_exporter(exporter) { - OtelExporter::None => return Ok(SdkTracerProvider::builder().build()), + OtelExporter::None => return Ok(tracer_provider_builder(resource, span_attributes).build()), OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), OtelExporter::OtlpGrpc { endpoint, @@ -353,8 +425,7 @@ fn build_tracer_provider( TokioBatchSpanProcessor::builder(exporter_builder.build()?, runtime::Tokio) .build(); - return Ok(SdkTracerProvider::builder() - .with_resource(resource.clone()) + return Ok(tracer_provider_builder(resource, span_attributes) .with_span_processor(processor) .build()); } @@ -382,8 +453,7 @@ fn build_tracer_provider( let processor = BatchSpanProcessor::builder(span_exporter).build(); - Ok(SdkTracerProvider::builder() - .with_resource(resource.clone()) + Ok(tracer_provider_builder(resource, span_attributes) .with_span_processor(processor) .build()) } @@ -467,6 +537,8 @@ mod tests { trace_exporter: OtelExporter::None, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), } } } diff --git a/codex-rs/otel/src/trace_context.rs b/codex-rs/otel/src/trace_context.rs index 010078e990..c625a416a7 100644 --- a/codex-rs/otel/src/trace_context.rs +++ b/codex-rs/otel/src/trace_context.rs @@ -1,11 +1,16 @@ +use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::HashMap; use std::env; +use std::str::FromStr; use std::sync::OnceLock; +use std::sync::RwLock; use codex_protocol::protocol::W3cTraceContext; use opentelemetry::Context; use opentelemetry::propagation::TextMapPropagator; use opentelemetry::trace::TraceContextExt; +use opentelemetry::trace::TraceState; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing::Span; use tracing::debug; @@ -16,6 +21,11 @@ const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT"; const TRACESTATE_ENV_VAR: &str = "TRACESTATE"; static TRACEPARENT_CONTEXT: OnceLock> = OnceLock::new(); +// Trace context propagation can happen outside the provider object, so configured +// tracestate lives beside the process-global tracer provider. +static TRACESTATE_ENTRIES: OnceLock>>> = + OnceLock::new(); + pub fn current_span_w3c_trace_context() -> Option { span_w3c_trace_context(&Span::current()) } @@ -28,13 +38,28 @@ pub fn span_w3c_trace_context(span: &Span) -> Option { let mut headers = HashMap::new(); TraceContextPropagator::new().inject_context(&context, &mut headers); + let tracestate = headers.remove("tracestate"); + let configured_tracestate_guard = tracestate_entries() + .read() + .unwrap_or_else(std::sync::PoisonError::into_inner); Some(W3cTraceContext { traceparent: headers.remove("traceparent"), - tracestate: headers.remove("tracestate"), + tracestate: merge_tracestate_entries(tracestate.as_deref(), &configured_tracestate_guard), }) } +pub(crate) fn set_tracestate_entries( + entries: BTreeMap>, +) -> Result<(), Box> { + validate_tracestate_entries(&entries)?; + let mut guard = tracestate_entries() + .write() + .unwrap_or_else(std::sync::PoisonError::into_inner); + *guard = entries; + Ok(()) +} + pub fn current_span_trace_id() -> Option { let context = Span::current().context(); let span = context.span(); @@ -103,6 +128,177 @@ fn load_traceparent_context() -> Option { } } +fn tracestate_entries() -> &'static RwLock>> { + TRACESTATE_ENTRIES.get_or_init(|| RwLock::new(BTreeMap::new())) +} + +fn merge_tracestate_entries( + tracestate: Option<&str>, + configured_entries: &BTreeMap>, +) -> Option { + let mut trace_state = tracestate + .and_then(|tracestate| match TraceState::from_str(tracestate) { + Ok(trace_state) => Some(trace_state), + Err(err) => { + warn!("ignoring invalid tracestate while propagating trace context: {err}"); + None + } + }) + .unwrap_or_default(); + + // TraceState::insert places members at the front. Reverse iteration keeps + // deterministic map order while upserting fields inside configured members. + for (key, fields) in configured_entries.iter().rev() { + let value = merge_tracestate_member_fields(trace_state.get(key), fields); + trace_state = match trace_state.insert(key.clone(), value) { + Ok(trace_state) => trace_state, + Err(err) => { + warn!("ignoring configured tracestate while propagating trace context: {err}"); + break; + } + }; + } + + let tracestate = trace_state.header(); + (!tracestate.is_empty()).then_some(tracestate) +} + +/// Validates configured tracestate members before they are propagated in W3C trace context. +pub fn validate_tracestate_entries( + entries: &BTreeMap>, +) -> Result<(), Box> { + // Reject malformed entries before installing them so propagated trace + // context remains acceptable to other W3C Trace Context extractors. The + // SDK validates member keys and list structure, but configured member + // fields are joined into header values here and need stricter validation. + let entries = entries + .iter() + .map(|(key, fields)| encode_tracestate_member_fields(key, fields)) + .collect::, _>>()?; + TraceState::from_key_value( + entries + .iter() + .map(|(key, value)| (key.as_str(), value.as_str())), + ) + .map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid configured tracestate: {err}"), + ) + })?; + Ok(()) +} + +/// Validates one configured tracestate member and its encoded field value. +pub fn validate_tracestate_member( + member_key: &str, + fields: &BTreeMap, +) -> Result<(), Box> { + let (key, value) = encode_tracestate_member_fields(member_key, fields)?; + TraceState::from_key_value([(key.as_str(), value.as_str())]).map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("invalid configured tracestate: {err}"), + ) + })?; + Ok(()) +} + +fn encode_tracestate_member_fields( + member_key: &str, + fields: &BTreeMap, +) -> Result<(String, String), Box> { + // Configured fields are encoded into one opaque tracestate member value. + // Validate both the field grammar and the final header value so malformed + // config cannot produce propagated trace context that downstream W3C + // extractors reject. + let mut encoded = Vec::with_capacity(fields.len()); + for (field_key, value) in fields { + if !is_configured_tracestate_field_key(field_key) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate field key {member_key}.{field_key}" + ))); + } + if !is_configured_tracestate_field_value(value) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate value for {member_key}.{field_key}" + ))); + } + encoded.push(format!("{field_key}:{value}")); + } + let value = encoded.join(";"); + if !is_header_safe_tracestate_member_value(&value) { + return Err(invalid_tracestate_config(format!( + "invalid configured tracestate value for {member_key}" + ))); + } + Ok((member_key.to_string(), value)) +} + +fn is_configured_tracestate_field_key(field_key: &str) -> bool { + !field_key.is_empty() + && field_key + .bytes() + .all(|byte| matches!(byte, b'!'..=b'~') && !matches!(byte, b':' | b';' | b',' | b'=')) +} + +fn is_configured_tracestate_field_value(value: &str) -> bool { + value + .bytes() + .all(|byte| is_tracestate_member_value_byte(byte) && byte != b';') +} + +fn is_header_safe_tracestate_member_value(value: &str) -> bool { + value.is_empty() + || (value.bytes().all(is_tracestate_member_value_byte) + && value.as_bytes().last().is_some_and(|byte| *byte != b' ')) +} + +fn is_tracestate_member_value_byte(byte: u8) -> bool { + matches!(byte, b' '..=b'~') && !matches!(byte, b',' | b'=') +} + +fn invalid_tracestate_config(message: String) -> Box { + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + message, + )) +} + +fn merge_tracestate_member_fields( + existing: Option<&str>, + configured_fields: &BTreeMap, +) -> String { + // W3C TraceState treats member values as opaque strings. The config models + // values as semicolon-separated key:value fields so selected fields can be + // upserted without replacing unrelated fields in the same member. + let mut fields = Vec::new(); + let mut seen = BTreeSet::new(); + + if let Some(existing) = existing { + for field in existing.split(';').filter(|field| !field.is_empty()) { + if let Some((field_key, _)) = field.split_once(':') { + if let Some(value) = configured_fields.get(field_key) { + if seen.insert(field_key) { + fields.push(format!("{field_key}:{value}")); + } + continue; + } + seen.insert(field_key); + } + fields.push(field.to_string()); + } + } + + fields.extend( + configured_fields + .iter() + .filter(|(field_key, _)| !seen.contains(field_key.as_str())) + .map(|(field_key, value)| format!("{field_key}:{value}")), + ); + fields.join(";") +} + #[cfg(test)] mod tests { use super::context_from_trace_headers; diff --git a/codex-rs/otel/tests/suite/otlp_http_loopback.rs b/codex-rs/otel/tests/suite/otlp_http_loopback.rs index fd4e3531d8..4c2dd36f76 100644 --- a/codex-rs/otel/tests/suite/otlp_http_loopback.rs +++ b/codex-rs/otel/tests/suite/otlp_http_loopback.rs @@ -5,18 +5,25 @@ use codex_otel::OtelHttpProtocol; use codex_otel::OtelProvider; use codex_otel::OtelSettings; use codex_otel::Result; +use codex_otel::current_span_w3c_trace_context; +use codex_otel::set_parent_from_w3c_trace_context; +use codex_protocol::protocol::W3cTraceContext; +use std::collections::BTreeMap; use std::collections::HashMap; use std::io::Read as _; use std::io::Write as _; use std::net::TcpListener; use std::net::TcpStream; use std::path::PathBuf; +use std::sync::Mutex; use std::sync::mpsc; use std::thread; use std::time::Duration; use std::time::Instant; use tracing_subscriber::layer::SubscriberExt; +static TRACE_CONTEXT_CONFIG_LOCK: Mutex<()> = Mutex::new(()); + struct CapturedRequest { path: String, content_type: Option, @@ -217,9 +224,41 @@ fn otlp_http_exporter_sends_metrics_to_collector() -> Result<()> { Ok(()) } +#[test] +fn otel_provider_rejects_header_unsafe_configured_tracestate() { + let result = OtelProvider::from(&OtelSettings { + environment: "test".to_string(), + service_name: "codex-cli".to_string(), + service_version: env!("CARGO_PKG_VERSION").to_string(), + codex_home: PathBuf::from("."), + exporter: OtelExporter::None, + trace_exporter: OtelExporter::OtlpHttp { + endpoint: "http://127.0.0.1:1/v1/traces".to_string(), + headers: HashMap::new(), + protocol: OtelHttpProtocol::Json, + tls: None, + }, + metrics_exporter: OtelExporter::None, + runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::from([( + "example".to_string(), + BTreeMap::from([("alpha".to_string(), "one\ntwo".to_string())]), + )]), + }); + + let Err(err) = result else { + panic!("expected header-unsafe configured tracestate to be rejected"); + }; + assert!(err.to_string().contains("configured tracestate value")); +} + #[test] fn otlp_http_exporter_sends_traces_to_collector() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -266,12 +305,23 @@ fn otlp_http_exporter_sends_traces_to_collector() }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::from([( + "test.configured_attribute".to_string(), + "configured-value".to_string(), + )]), + tracestate: BTreeMap::from([( + "example".to_string(), + BTreeMap::from([ + ("alpha".to_string(), "one".to_string()), + ("beta".to_string(), "two".to_string()), + ]), + )]), })? .expect("otel provider"); let tracing_layer = otel.tracing_layer().expect("tracing layer"); let subscriber = tracing_subscriber::registry().with(tracing_layer); - tracing::subscriber::with_default(subscriber, || { + let propagated_trace = tracing::subscriber::with_default(subscriber, || { let span = tracing::info_span!( "trace-loopback", otel.name = "trace-loopback", @@ -279,11 +329,28 @@ fn otlp_http_exporter_sends_traces_to_collector() rpc.system = "jsonrpc", rpc.method = "trace-loopback", ); + assert!(set_parent_from_w3c_trace_context( + &span, + &W3cTraceContext { + traceparent: Some( + "00-00000000000000000000000000000001-0000000000000002-01".to_string(), + ), + tracestate: Some("example=alpha:zero;keep:yes,other=value".to_string()), + }, + )); let _guard = span.enter(); + let propagated_trace = + current_span_w3c_trace_context().expect("current span should have trace context"); tracing::info!("trace loopback event"); + propagated_trace }); otel.shutdown(); + assert_eq!( + propagated_trace.tracestate.as_deref(), + Some("example=alpha:one;keep:yes;beta:two,other=value") + ); + server.join().expect("server join"); let captured = rx.recv_timeout(Duration::from_secs(1)).expect("captured"); @@ -321,6 +388,11 @@ fn otlp_http_exporter_sends_traces_to_collector() "expected service name not found; body prefix: {}", &body.chars().take(2000).collect::() ); + assert!( + body.contains("test.configured_attribute") && body.contains("configured-value"), + "expected configured span attribute not found; body prefix: {}", + &body.chars().take(2000).collect::() + ); Ok(()) } @@ -328,6 +400,9 @@ fn otlp_http_exporter_sends_traces_to_collector() #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -374,6 +449,8 @@ async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), })? .expect("otel provider"); let tracing_layer = otel.tracing_layer().expect("tracing layer"); @@ -436,6 +513,9 @@ async fn otlp_http_exporter_sends_traces_to_collector_in_tokio_runtime() #[test] fn otlp_http_exporter_sends_traces_to_collector_in_current_thread_tokio_runtime() -> std::result::Result<(), Box> { + let _trace_context_config_guard = TRACE_CONTEXT_CONFIG_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let listener = TcpListener::bind("127.0.0.1:0").expect("bind"); let addr = listener.local_addr().expect("local_addr"); listener.set_nonblocking(true).expect("set_nonblocking"); @@ -490,6 +570,8 @@ fn otlp_http_exporter_sends_traces_to_collector_in_current_thread_tokio_runtime( }, metrics_exporter: OtelExporter::None, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), }) .map_err(|err| err.to_string())? .expect("otel provider"); diff --git a/codex-rs/windows-sandbox-rs/src/wfp_setup.rs b/codex-rs/windows-sandbox-rs/src/wfp_setup.rs index bfcc2c069b..89d29118c2 100644 --- a/codex-rs/windows-sandbox-rs/src/wfp_setup.rs +++ b/codex-rs/windows-sandbox-rs/src/wfp_setup.rs @@ -5,6 +5,7 @@ use codex_otel::OtelExporter; use codex_otel::OtelProvider; use codex_otel::OtelSettings; use codex_otel::StatsigMetricsSettings; +use std::collections::BTreeMap; use std::path::Path; const WFP_SETUP_SERVICE_NAME: &str = "codex-windows-sandbox-setup"; @@ -54,6 +55,8 @@ fn build_wfp_metrics_provider( trace_exporter: OtelExporter::None, metrics_exporter: OtelExporter::Statsig, runtime_metrics: false, + span_attributes: BTreeMap::new(), + tracestate: BTreeMap::new(), }) .map_err(|err| anyhow::anyhow!("failed to initialize WFP setup metrics provider: {err}")) } diff --git a/docs/config.md b/docs/config.md index e0a0c5fc2f..759d6b94b8 100644 --- a/docs/config.md +++ b/docs/config.md @@ -26,3 +26,24 @@ When enabled, Codex appends a `Co-authored-by:` trailer using the configured attribution value. If `commit_attribution` is omitted, Codex uses `Codex `. Set `commit_attribution = ""` to disable the trailer while leaving the feature flag enabled. + +## OpenTelemetry Trace Metadata + +Codex can add static OpenTelemetry span attributes to exported trace spans and +static W3C tracestate fields to propagated trace context: + +```toml +[otel.span_attributes] +"example.trace_attr" = "enabled" + +[otel.tracestate.example] +alpha = "one" +beta = "two" +``` + +Nested `otel.tracestate` tables are encoded as semicolon-separated `key:value` +fields inside the named tracestate member. If propagated trace context already +has the named member, Codex upserts configured fields and preserves other fields +in that member. This config shape does not support setting opaque tracestate +member values. Invalid trace metadata entries are ignored during config load and +reported as startup warnings.