diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index e92dbb07da..efd7d58d5a 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -3514,6 +3514,7 @@ dependencies = [ "anyhow", "codex-code-mode", "codex-protocol", + "http 1.4.0", "pretty_assertions", "serde", "serde_json", diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 9c0a7d742f..545d131c69 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -1256,7 +1256,7 @@ impl ModelClientSession { self.client.state.auth_env_telemetry.clone(), ); let compression = self.responses_request_compression(client_setup.auth.as_ref()); - let options = self + let mut options = self .build_responses_options(turn_metadata_header, compression) .await; @@ -1269,6 +1269,7 @@ impl ModelClientSession { service_tier.clone(), )?; let inference_trace_attempt = inference_trace.start_attempt(); + inference_trace_attempt.add_request_headers(&mut options.extra_headers); inference_trace_attempt.record_started(&request); let client = ApiResponsesClient::new( transport, diff --git a/codex-rs/rollout-trace/Cargo.toml b/codex-rs/rollout-trace/Cargo.toml index a743046115..b368c9acc5 100644 --- a/codex-rs/rollout-trace/Cargo.toml +++ b/codex-rs/rollout-trace/Cargo.toml @@ -16,10 +16,11 @@ workspace = true anyhow = { workspace = true } codex-code-mode = { workspace = true } codex-protocol = { workspace = true } +http = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tracing = { workspace = true } -uuid = { workspace = true } +uuid = { workspace = true, features = ["v4"] } [dev-dependencies] pretty_assertions = { workspace = true } diff --git a/codex-rs/rollout-trace/src/inference.rs b/codex-rs/rollout-trace/src/inference.rs index b05f891e7b..20366c2c70 100644 --- a/codex-rs/rollout-trace/src/inference.rs +++ b/codex-rs/rollout-trace/src/inference.rs @@ -7,13 +7,15 @@ use std::fmt::Display; use std::sync::Arc; use std::sync::atomic::AtomicBool; -use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::TokenUsage; +use http::HeaderMap; +use http::HeaderValue; use serde::Serialize; use serde_json::Value as JsonValue; +use uuid::Uuid; use crate::model::AgentThreadId; use crate::model::CodexTurnId; @@ -23,7 +25,7 @@ use crate::raw_event::RawTraceEventContext; use crate::raw_event::RawTraceEventPayload; use crate::writer::TraceWriter; -static NEXT_INFERENCE_ATTEMPT: AtomicU64 = AtomicU64::new(1); +const INFERENCE_CALL_ID_HEADER: &str = "x-codex-inference-call-id"; /// Turn-local inference tracing context. /// @@ -140,6 +142,30 @@ impl InferenceTraceAttempt { } } + fn inference_call_id(&self) -> Option<&str> { + match &self.state { + InferenceTraceAttemptState::Disabled => None, + InferenceTraceAttemptState::Enabled(attempt) => { + Some(attempt.inference_call_id.as_str()) + } + } + } + + /// Adds rollout-trace propagation headers for this attempt when tracing is enabled. + pub fn add_request_headers(&self, headers: &mut HeaderMap) { + let Some(inference_call_id) = self.inference_call_id() else { + return; + }; + let Ok(inference_call_id) = HeaderValue::from_str(inference_call_id) else { + // These IDs are generated internally as UUID strings, so rejection + // should be impossible in practice. Tracing remains best-effort, + // though, and must never make provider requests fail. + return; + }; + + headers.insert(INFERENCE_CALL_ID_HEADER, inference_call_id); + } + /// Records the exact request object about to be sent to the model provider. pub fn record_started(&self, request: &impl Serialize) { let InferenceTraceAttemptState::Enabled(attempt) = &self.state else { @@ -315,8 +341,7 @@ pub(crate) fn trace_response_item_json(item: &ResponseItem) -> JsonValue { } fn next_inference_call_id() -> InferenceCallId { - let ordinal = NEXT_INFERENCE_ATTEMPT.fetch_add(1, Ordering::Relaxed); - format!("inference:{ordinal}") + Uuid::new_v4().to_string() } fn write_json_payload_best_effort( @@ -372,6 +397,44 @@ mod tests { use crate::model::ExecutionStatus; use crate::replay_bundle; + #[test] + fn disabled_attempt_adds_no_request_headers() { + let mut headers = HeaderMap::new(); + + InferenceTraceAttempt::disabled().add_request_headers(&mut headers); + + assert!(headers.is_empty()); + } + + #[test] + fn enabled_attempt_adds_inference_request_header() -> anyhow::Result<()> { + let temp = TempDir::new()?; + let writer = Arc::new(TraceWriter::create( + temp.path(), + "trace-1".to_string(), + "rollout-1".to_string(), + "thread-root".to_string(), + )?); + let context = InferenceTraceContext::enabled( + writer, + "thread-root".to_string(), + "turn-1".to_string(), + "gpt-test".to_string(), + "test-provider".to_string(), + ); + let attempt = context.start_attempt(); + let mut headers = HeaderMap::new(); + + attempt.add_request_headers(&mut headers); + + let header = headers + .get(INFERENCE_CALL_ID_HEADER) + .expect("inference header present"); + assert_eq!(Some(header.to_str()?), attempt.inference_call_id()); + assert!(Uuid::parse_str(header.to_str()?).is_ok()); + Ok(()) + } + #[test] fn enabled_context_records_replayable_inference_attempt() -> anyhow::Result<()> { let temp = TempDir::new()?;