mirror of
https://github.com/openai/codex.git
synced 2026-05-24 13:04:29 +00:00
[rollout-trace] Add x-codex-inference-call-id header to inference calls. (#22311)
This allows us to attach call logs to inference requests in traces.
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -3514,6 +3514,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"codex-code-mode",
|
||||
"codex-protocol",
|
||||
"http 1.4.0",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
||||
@@ -1256,7 +1256,7 @@ impl ModelClientSession {
|
||||
self.client.state.auth_env_telemetry.clone(),
|
||||
);
|
||||
let compression = self.responses_request_compression(client_setup.auth.as_ref());
|
||||
let options = self
|
||||
let mut options = self
|
||||
.build_responses_options(turn_metadata_header, compression)
|
||||
.await;
|
||||
|
||||
@@ -1269,6 +1269,7 @@ impl ModelClientSession {
|
||||
service_tier.clone(),
|
||||
)?;
|
||||
let inference_trace_attempt = inference_trace.start_attempt();
|
||||
inference_trace_attempt.add_request_headers(&mut options.extra_headers);
|
||||
inference_trace_attempt.record_started(&request);
|
||||
let client = ApiResponsesClient::new(
|
||||
transport,
|
||||
|
||||
@@ -16,10 +16,11 @@ workspace = true
|
||||
anyhow = { workspace = true }
|
||||
codex-code-mode = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
http = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
|
||||
@@ -7,13 +7,15 @@
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use http::HeaderMap;
|
||||
use http::HeaderValue;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::model::AgentThreadId;
|
||||
use crate::model::CodexTurnId;
|
||||
@@ -23,7 +25,7 @@ use crate::raw_event::RawTraceEventContext;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
static NEXT_INFERENCE_ATTEMPT: AtomicU64 = AtomicU64::new(1);
|
||||
const INFERENCE_CALL_ID_HEADER: &str = "x-codex-inference-call-id";
|
||||
|
||||
/// Turn-local inference tracing context.
|
||||
///
|
||||
@@ -140,6 +142,30 @@ impl InferenceTraceAttempt {
|
||||
}
|
||||
}
|
||||
|
||||
fn inference_call_id(&self) -> Option<&str> {
|
||||
match &self.state {
|
||||
InferenceTraceAttemptState::Disabled => None,
|
||||
InferenceTraceAttemptState::Enabled(attempt) => {
|
||||
Some(attempt.inference_call_id.as_str())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds rollout-trace propagation headers for this attempt when tracing is enabled.
|
||||
pub fn add_request_headers(&self, headers: &mut HeaderMap) {
|
||||
let Some(inference_call_id) = self.inference_call_id() else {
|
||||
return;
|
||||
};
|
||||
let Ok(inference_call_id) = HeaderValue::from_str(inference_call_id) else {
|
||||
// These IDs are generated internally as UUID strings, so rejection
|
||||
// should be impossible in practice. Tracing remains best-effort,
|
||||
// though, and must never make provider requests fail.
|
||||
return;
|
||||
};
|
||||
|
||||
headers.insert(INFERENCE_CALL_ID_HEADER, inference_call_id);
|
||||
}
|
||||
|
||||
/// Records the exact request object about to be sent to the model provider.
|
||||
pub fn record_started(&self, request: &impl Serialize) {
|
||||
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
@@ -315,8 +341,7 @@ pub(crate) fn trace_response_item_json(item: &ResponseItem) -> JsonValue {
|
||||
}
|
||||
|
||||
fn next_inference_call_id() -> InferenceCallId {
|
||||
let ordinal = NEXT_INFERENCE_ATTEMPT.fetch_add(1, Ordering::Relaxed);
|
||||
format!("inference:{ordinal}")
|
||||
Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
fn write_json_payload_best_effort(
|
||||
@@ -372,6 +397,44 @@ mod tests {
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::replay_bundle;
|
||||
|
||||
#[test]
|
||||
fn disabled_attempt_adds_no_request_headers() {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
InferenceTraceAttempt::disabled().add_request_headers(&mut headers);
|
||||
|
||||
assert!(headers.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enabled_attempt_adds_inference_request_header() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = Arc::new(TraceWriter::create(
|
||||
temp.path(),
|
||||
"trace-1".to_string(),
|
||||
"rollout-1".to_string(),
|
||||
"thread-root".to_string(),
|
||||
)?);
|
||||
let context = InferenceTraceContext::enabled(
|
||||
writer,
|
||||
"thread-root".to_string(),
|
||||
"turn-1".to_string(),
|
||||
"gpt-test".to_string(),
|
||||
"test-provider".to_string(),
|
||||
);
|
||||
let attempt = context.start_attempt();
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
attempt.add_request_headers(&mut headers);
|
||||
|
||||
let header = headers
|
||||
.get(INFERENCE_CALL_ID_HEADER)
|
||||
.expect("inference header present");
|
||||
assert_eq!(Some(header.to_str()?), attempt.inference_call_id());
|
||||
assert!(Uuid::parse_str(header.to_str()?).is_ok());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enabled_context_records_replayable_inference_attempt() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
|
||||
Reference in New Issue
Block a user