Add auth env observability (#14905)

CXC-410 Emit Env Var Status with `/feedback` report

Add more observability on top of #14611 

[Unset](https://openai.sentry.io/issues/7340419168/?project=4510195390611458&query=019cfa8d-c1ba-7002-96fa-e35fc340551d&referrer=issue-stream)

[Set](https://openai.sentry.io/issues/7340426331/?project=4510195390611458&query=019cfa91-aba1-7823-ab7e-762edfbc0ed4&referrer=issue-stream)
<img width="1063" height="610" alt="image"
src="https://github.com/user-attachments/assets/937ab026-1c2d-4757-81d5-5f31b853113e"
/>


###### Summary
- Adds auth-env telemetry that records whether key auth-related env
overrides were present on session start and request paths.
- Threads those auth-env fields through `/responses`, websocket, and
`/models` telemetry and feedback metadata.
- Buckets custom provider `env_key` configuration to a safe
`"configured"` value instead of emitting raw config text.
- Keeps the slice observability-only: no raw token values or raw URLs
are emitted.

###### Rationale (from spec findings)
- 401 and auth-path debugging needs a way to distinguish env-driven auth
paths from sessions with no auth env override.
- Startup and model-refresh failures need the same auth-env diagnostics
as normal request failures.
- Feedback and Sentry tags need the same auth-env signal as OTel events
so reports can be triaged consistently.
- Custom provider config is user-controlled text, so the telemetry
contract must stay presence-only / bucketed.

###### Scope
- Adds a small `AuthEnvTelemetry` bundle for env presence collection and
threads it through the main request/session telemetry paths.
- Does not add endpoint/base-url/provider-header/geo routing attribution
or broader telemetry API redesign.

###### Trade-offs
- `provider_env_key_name` is bucketed to `"configured"` instead of
preserving the literal configured env var name.
- `/models` is included because startup/model-refresh auth failures need
the same diagnostics, but broader parity work remains out of scope.
- This slice keeps the existing telemetry APIs and layers auth-env
fields onto them rather than redesigning the metadata model.

###### Client follow-up
- Add the separate endpoint/base-url attribution slice if routing-source
diagnosis is still needed.
- Add provider-header or residency attribution only if auth-env presence
proves insufficient in real reports.
- Revisit whether any additional auth-related env inputs need safe
bucketing after more 401 triage data.

###### Testing
- `cargo test -p codex-core emit_feedback_request_tags -- --nocapture`
- `cargo test -p codex-core
collect_auth_env_telemetry_buckets_provider_env_key_name -- --nocapture`
- `cargo test -p codex-core
models_request_telemetry_emits_auth_env_feedback_tags_on_failure --
--nocapture`
- `cargo test -p codex-otel
otel_export_routing_policy_routes_api_request_auth_observability --
--nocapture`
- `cargo test -p codex-otel
otel_export_routing_policy_routes_websocket_connect_auth_observability
-- --nocapture`
- `cargo test -p codex-otel
otel_export_routing_policy_routes_websocket_request_transport_observability
-- --nocapture`
- `cargo test -p codex-core --no-run --message-format short`
- `cargo test -p codex-otel --no-run --message-format short`

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Colin Young
2026-03-17 14:26:27 -07:00
committed by GitHub
parent ee756eb80f
commit 0d2ff40a58
12 changed files with 770 additions and 161 deletions

View File

@@ -1,3 +1,4 @@
use codex_otel::AuthEnvTelemetryMetadata;
use codex_otel::OtelProvider;
use codex_otel::SessionTelemetry;
use codex_otel::TelemetryAuthMode;
@@ -18,6 +19,9 @@ use tracing_subscriber::filter::filter_fn;
use tracing_subscriber::layer::SubscriberExt;
use codex_protocol::ThreadId;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::user_input::UserInput;
@@ -76,6 +80,17 @@ fn find_span_event_by_name_attr<'a>(
.unwrap_or_else(|| panic!("missing span event: {event_name}"))
}
fn auth_env_metadata() -> AuthEnvTelemetryMetadata {
AuthEnvTelemetryMetadata {
openai_api_key_env_present: true,
codex_api_key_env_present: false,
codex_api_key_env_enabled: true,
provider_env_key_name: Some("configured".to_string()),
provider_env_key_present: Some(true),
refresh_token_url_override_present: true,
}
}
#[test]
fn otel_export_routing_policy_routes_user_prompt_log_and_trace_events() {
let log_exporter = InMemoryLogExporter::default();
@@ -482,9 +497,21 @@ fn otel_export_routing_policy_routes_api_request_auth_observability() {
true,
"tty".to_string(),
SessionSource::Cli,
);
)
.with_auth_env(auth_env_metadata());
let root_span = tracing::info_span!("root");
let _root_guard = root_span.enter();
manager.conversation_starts(
"openai",
None,
ReasoningSummary::Auto,
None,
None,
AskForApproval::Never,
SandboxPolicy::DangerFullAccess,
Vec::new(),
None,
);
manager.record_api_request(
1,
Some(401),
@@ -507,6 +534,20 @@ fn otel_export_routing_policy_routes_api_request_auth_observability() {
tracer_provider.force_flush().expect("flush traces");
let logs = log_exporter.get_emitted_logs().expect("log export");
let conversation_log = find_log_by_event_name(&logs, "codex.conversation_starts");
let conversation_log_attrs = log_attributes(&conversation_log.record);
assert_eq!(
conversation_log_attrs
.get("auth.env_openai_api_key_present")
.map(String::as_str),
Some("true")
);
assert_eq!(
conversation_log_attrs
.get("auth.env_provider_key_name")
.map(String::as_str),
Some("configured")
);
let request_log = find_log_by_event_name(&logs, "codex.api_request");
let request_log_attrs = log_attributes(&request_log.record);
assert_eq!(
@@ -547,8 +588,29 @@ fn otel_export_routing_policy_routes_api_request_auth_observability() {
request_log_attrs.get("auth.error").map(String::as_str),
Some("missing_authorization_header")
);
assert_eq!(
request_log_attrs
.get("auth.env_codex_api_key_enabled")
.map(String::as_str),
Some("true")
);
assert_eq!(
request_log_attrs
.get("auth.env_refresh_token_url_override_present")
.map(String::as_str),
Some("true")
);
let spans = span_exporter.get_finished_spans().expect("span export");
let conversation_trace_event =
find_span_event_by_name_attr(&spans[0].events.events, "codex.conversation_starts");
let conversation_trace_attrs = span_event_attributes(conversation_trace_event);
assert_eq!(
conversation_trace_attrs
.get("auth.env_provider_key_present")
.map(String::as_str),
Some("true")
);
let request_trace_event =
find_span_event_by_name_attr(&spans[0].events.events, "codex.api_request");
let request_trace_attrs = span_event_attributes(request_trace_event);
@@ -574,6 +636,12 @@ fn otel_export_routing_policy_routes_api_request_auth_observability() {
request_trace_attrs.get("endpoint").map(String::as_str),
Some("/responses")
);
assert_eq!(
request_trace_attrs
.get("auth.env_openai_api_key_present")
.map(String::as_str),
Some("true")
);
}
#[test]
@@ -614,7 +682,8 @@ fn otel_export_routing_policy_routes_websocket_connect_auth_observability() {
true,
"tty".to_string(),
SessionSource::Cli,
);
)
.with_auth_env(auth_env_metadata());
let root_span = tracing::info_span!("root");
let _root_guard = root_span.enter();
manager.record_websocket_connect(
@@ -667,6 +736,12 @@ fn otel_export_routing_policy_routes_websocket_connect_auth_observability() {
.map(String::as_str),
Some("false")
);
assert_eq!(
connect_log_attrs
.get("auth.env_provider_key_name")
.map(String::as_str),
Some("configured")
);
let spans = span_exporter.get_finished_spans().expect("span export");
let connect_trace_event =
@@ -678,6 +753,12 @@ fn otel_export_routing_policy_routes_websocket_connect_auth_observability() {
.map(String::as_str),
Some("reload")
);
assert_eq!(
connect_trace_attrs
.get("auth.env_refresh_token_url_override_present")
.map(String::as_str),
Some("true")
);
}
#[test]
@@ -718,7 +799,8 @@ fn otel_export_routing_policy_routes_websocket_request_transport_observability()
true,
"tty".to_string(),
SessionSource::Cli,
);
)
.with_auth_env(auth_env_metadata());
let root_span = tracing::info_span!("root");
let _root_guard = root_span.enter();
manager.record_websocket_request(
@@ -744,6 +826,12 @@ fn otel_export_routing_policy_routes_websocket_request_transport_observability()
request_log_attrs.get("error.message").map(String::as_str),
Some("stream error")
);
assert_eq!(
request_log_attrs
.get("auth.env_openai_api_key_present")
.map(String::as_str),
Some("true")
);
let spans = span_exporter.get_finished_spans().expect("span export");
let request_trace_event =
@@ -755,4 +843,10 @@ fn otel_export_routing_policy_routes_websocket_request_transport_observability()
.map(String::as_str),
Some("true")
);
assert_eq!(
request_trace_attrs
.get("auth.env_provider_key_present")
.map(String::as_str),
Some("true")
);
}