[rollout-trace] Add x-codex-inference-call-id header to inference calls. (#22311)

This allows us to attach call logs to inference requests in traces.
This commit is contained in:
cassirer-openai
2026-05-12 05:55:11 -07:00
committed by GitHub
parent d996f5366f
commit cb55b769d1
4 changed files with 72 additions and 6 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -3514,6 +3514,7 @@ dependencies = [
"anyhow",
"codex-code-mode",
"codex-protocol",
"http 1.4.0",
"pretty_assertions",
"serde",
"serde_json",

View File

@@ -1256,7 +1256,7 @@ impl ModelClientSession {
self.client.state.auth_env_telemetry.clone(),
);
let compression = self.responses_request_compression(client_setup.auth.as_ref());
let options = self
let mut options = self
.build_responses_options(turn_metadata_header, compression)
.await;
@@ -1269,6 +1269,7 @@ impl ModelClientSession {
service_tier.clone(),
)?;
let inference_trace_attempt = inference_trace.start_attempt();
inference_trace_attempt.add_request_headers(&mut options.extra_headers);
inference_trace_attempt.record_started(&request);
let client = ApiResponsesClient::new(
transport,

View File

@@ -16,10 +16,11 @@ workspace = true
anyhow = { workspace = true }
codex-code-mode = { workspace = true }
codex-protocol = { workspace = true }
http = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
[dev-dependencies]
pretty_assertions = { workspace = true }

View File

@@ -7,13 +7,15 @@
use std::fmt::Display;
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::TokenUsage;
use http::HeaderMap;
use http::HeaderValue;
use serde::Serialize;
use serde_json::Value as JsonValue;
use uuid::Uuid;
use crate::model::AgentThreadId;
use crate::model::CodexTurnId;
@@ -23,7 +25,7 @@ use crate::raw_event::RawTraceEventContext;
use crate::raw_event::RawTraceEventPayload;
use crate::writer::TraceWriter;
static NEXT_INFERENCE_ATTEMPT: AtomicU64 = AtomicU64::new(1);
const INFERENCE_CALL_ID_HEADER: &str = "x-codex-inference-call-id";
/// Turn-local inference tracing context.
///
@@ -140,6 +142,30 @@ impl InferenceTraceAttempt {
}
}
fn inference_call_id(&self) -> Option<&str> {
match &self.state {
InferenceTraceAttemptState::Disabled => None,
InferenceTraceAttemptState::Enabled(attempt) => {
Some(attempt.inference_call_id.as_str())
}
}
}
/// Adds rollout-trace propagation headers for this attempt when tracing is enabled.
pub fn add_request_headers(&self, headers: &mut HeaderMap) {
let Some(inference_call_id) = self.inference_call_id() else {
return;
};
let Ok(inference_call_id) = HeaderValue::from_str(inference_call_id) else {
// These IDs are generated internally as UUID strings, so rejection
// should be impossible in practice. Tracing remains best-effort,
// though, and must never make provider requests fail.
return;
};
headers.insert(INFERENCE_CALL_ID_HEADER, inference_call_id);
}
/// Records the exact request object about to be sent to the model provider.
pub fn record_started(&self, request: &impl Serialize) {
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
@@ -315,8 +341,7 @@ pub(crate) fn trace_response_item_json(item: &ResponseItem) -> JsonValue {
}
fn next_inference_call_id() -> InferenceCallId {
let ordinal = NEXT_INFERENCE_ATTEMPT.fetch_add(1, Ordering::Relaxed);
format!("inference:{ordinal}")
Uuid::new_v4().to_string()
}
fn write_json_payload_best_effort(
@@ -372,6 +397,44 @@ mod tests {
use crate::model::ExecutionStatus;
use crate::replay_bundle;
#[test]
fn disabled_attempt_adds_no_request_headers() {
let mut headers = HeaderMap::new();
InferenceTraceAttempt::disabled().add_request_headers(&mut headers);
assert!(headers.is_empty());
}
#[test]
fn enabled_attempt_adds_inference_request_header() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = Arc::new(TraceWriter::create(
temp.path(),
"trace-1".to_string(),
"rollout-1".to_string(),
"thread-root".to_string(),
)?);
let context = InferenceTraceContext::enabled(
writer,
"thread-root".to_string(),
"turn-1".to_string(),
"gpt-test".to_string(),
"test-provider".to_string(),
);
let attempt = context.start_attempt();
let mut headers = HeaderMap::new();
attempt.add_request_headers(&mut headers);
let header = headers
.get(INFERENCE_CALL_ID_HEADER)
.expect("inference header present");
assert_eq!(Some(header.to_str()?), attempt.inference_call_id());
assert!(Uuid::parse_str(header.to_str()?).is_ok());
Ok(())
}
#[test]
fn enabled_context_records_replayable_inference_attempt() -> anyhow::Result<()> {
let temp = TempDir::new()?;