Files
codex/codex-rs/otel/tests/suite/runtime_summary.rs
Colin Young d692b74007 Add auth 401 observability to client bug reports (#14611)
CXC-392

  [With
  401](https://openai.sentry.io/issues/7333870443/?project=4510195390611458&query=019ce8f8-560c-7f10-a00a-c59553740674&referrer=issue-stream)
  <img width="1909" height="555" alt="401 auth tags in Sentry"
  src="https://github.com/user-attachments/assets/412ea950-61c4-4780-9697-15c270971ee3"
  />


  - auth_401_*: preserved facts from the latest unauthorized response snapshot
  - auth_*: latest auth-related facts from the latest request attempt
  - auth_recovery_*: unauthorized recovery state and follow-up result


  Without 401
  <img width="1917" height="522" alt="happy-path auth tags in Sentry"
  src="https://github.com/user-attachments/assets/3381ed28-8022-43b0-b6c0-623a630e679f"
  />

  ###### Summary
  - Add client-visible 401 diagnostics for auth attachment, upstream auth classification, and 401 request id / cf-ray correlation.
  - Record unauthorized recovery mode, phase, outcome, and retry/follow-up status without changing auth behavior.
  - Surface the highest-signal auth and recovery fields on uploaded client bug reports so they are usable in Sentry.
  - Preserve original unauthorized evidence under `auth_401_*` while keeping follow-up result tags separate.

  ###### Rationale (from spec findings)
  - The dominant bucket needed proof of whether the client attached auth before send or upstream still classified the request as missing auth.
  - Client uploads needed to show whether unauthorized recovery ran and what the client tried next.
  - Request id and cf-ray needed to be preserved on the unauthorized response so server-side correlation is immediate.
  - The bug-report path needed the same auth evidence as the request telemetry path, otherwise the observability would not be operationally useful.

  ###### Scope
  - Add auth 401 and unauthorized-recovery observability in `codex-rs/core`, `codex-rs/codex-api`, and `codex-rs/otel`, including feedback-tag surfacing.
  - Keep auth semantics, refresh behavior, retry behavior, endpoint classification, and geo-denial follow-up work out of this PR.

  ###### Trade-offs
  - This exports only safe auth evidence: header presence/name, upstream auth classification, request ids, and recovery state. It does not export token values or raw upstream bodies.
  - This keeps websocket connection reuse as a transport clue because it can help distinguish stale reused sessions from fresh reconnects.
  - Misroute/base-url classification and geo-denial are intentionally deferred to a separate follow-up PR so this review stays focused on the dominant auth 401 bucket.

  ###### Client follow-up
  - PR 2 will add misroute/provider and geo-denial observability plus the matching feedback-tag surfacing.
  - A separate host/app-server PR should log auth-decision inputs so pre-send host auth state can be correlated with client request evidence.
  - `device_id` remains intentionally separate until there is a safe existing source on the feedback upload path.

  ###### Testing
  - `cargo test -p codex-core refresh_available_models_sorts_by_priority`
  - `cargo test -p codex-core emit_feedback_request_tags_`
  - `cargo test -p codex-core emit_feedback_auth_recovery_tags_`
  - `cargo test -p codex-core auth_request_telemetry_context_tracks_attached_auth_and_retry_phase`
  - `cargo test -p codex-core extract_response_debug_context_decodes_identity_headers`
  - `cargo test -p codex-core identity_auth_details`
  - `cargo test -p codex-core telemetry_error_messages_preserve_non_http_details`
  - `cargo test -p codex-core --all-features --no-run`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_api_request_auth_observability`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_connect_auth_observability`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_request_transport_observability`
2026-03-14 15:38:51 -07:00

140 lines
4.4 KiB
Rust

use codex_otel::RuntimeMetricTotals;
use codex_otel::RuntimeMetricsSummary;
use codex_otel::SessionTelemetry;
use codex_otel::TelemetryAuthMode;
use codex_otel::metrics::MetricsClient;
use codex_otel::metrics::MetricsConfig;
use codex_otel::metrics::Result;
use codex_protocol::ThreadId;
use codex_protocol::protocol::SessionSource;
use eventsource_stream::Event as StreamEvent;
use opentelemetry_sdk::metrics::InMemoryMetricExporter;
use pretty_assertions::assert_eq;
use std::time::Duration;
use tokio_tungstenite::tungstenite::Message;
#[test]
fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<()> {
let exporter = InMemoryMetricExporter::default();
let metrics = MetricsClient::new(
MetricsConfig::in_memory("test", "codex-cli", env!("CARGO_PKG_VERSION"), exporter)
.with_runtime_reader(),
)?;
let manager = SessionTelemetry::new(
ThreadId::new(),
"gpt-5.1",
"gpt-5.1",
Some("account-id".to_string()),
None,
Some(TelemetryAuthMode::ApiKey),
"test_originator".to_string(),
true,
"tty".to_string(),
SessionSource::Cli,
)
.with_metrics(metrics);
manager.reset_runtime_metrics();
manager.tool_result_with_tags(
"shell",
"call-1",
"{\"cmd\":\"echo\"}",
Duration::from_millis(250),
true,
"ok",
&[],
None,
None,
);
manager.record_api_request(
1,
Some(200),
None,
Duration::from_millis(300),
false,
None,
false,
None,
None,
"/responses",
None,
None,
None,
None,
);
manager.record_websocket_request(Duration::from_millis(400), None, false);
let sse_response: std::result::Result<
Option<std::result::Result<StreamEvent, eventsource_stream::EventStreamError<&str>>>,
tokio::time::error::Elapsed,
> = Ok(Some(Ok(StreamEvent {
event: "response.created".to_string(),
data: "{}".to_string(),
id: String::new(),
retry: None,
})));
manager.log_sse_event(&sse_response, Duration::from_millis(120));
let ws_response: std::result::Result<
Option<std::result::Result<Message, tokio_tungstenite::tungstenite::Error>>,
codex_api::ApiError,
> = Ok(Some(Ok(Message::Text(
r#"{"type":"response.created"}"#.into(),
))));
manager.record_websocket_event(&ws_response, Duration::from_millis(80));
let ws_timing_response: std::result::Result<
Option<std::result::Result<Message, tokio_tungstenite::tungstenite::Error>>,
codex_api::ApiError,
> = Ok(Some(Ok(Message::Text(
r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457,"engine_iapi_ttft_total_ms":211,"engine_service_ttft_total_ms":233,"engine_iapi_tbt_across_engine_calls_ms":377,"engine_service_tbt_across_engine_calls_ms":399}}"#
.into(),
))));
manager.record_websocket_event(&ws_timing_response, Duration::from_millis(20));
manager.record_duration(
"codex.turn.ttft.duration_ms",
Duration::from_millis(95),
&[],
);
manager.record_duration(
"codex.turn.ttfm.duration_ms",
Duration::from_millis(180),
&[],
);
let summary = manager
.runtime_metrics_summary()
.expect("runtime metrics summary should be available");
let expected = RuntimeMetricsSummary {
tool_calls: RuntimeMetricTotals {
count: 1,
duration_ms: 250,
},
api_calls: RuntimeMetricTotals {
count: 1,
duration_ms: 300,
},
streaming_events: RuntimeMetricTotals {
count: 1,
duration_ms: 120,
},
websocket_calls: RuntimeMetricTotals {
count: 1,
duration_ms: 400,
},
websocket_events: RuntimeMetricTotals {
count: 2,
duration_ms: 100,
},
responses_api_overhead_ms: 124,
responses_api_inference_time_ms: 457,
responses_api_engine_iapi_ttft_ms: 211,
responses_api_engine_service_ttft_ms: 233,
responses_api_engine_iapi_tbt_ms: 377,
responses_api_engine_service_tbt_ms: 399,
turn_ttft_ms: 95,
turn_ttfm_ms: 180,
};
assert_eq!(summary, expected);
Ok(())
}