Files
codex/codex-rs/core/src/util.rs
Colin Young d692b74007 Add auth 401 observability to client bug reports (#14611)
CXC-392

  [With
  401](https://openai.sentry.io/issues/7333870443/?project=4510195390611458&query=019ce8f8-560c-7f10-a00a-c59553740674&referrer=issue-stream)
  <img width="1909" height="555" alt="401 auth tags in Sentry"
  src="https://github.com/user-attachments/assets/412ea950-61c4-4780-9697-15c270971ee3"
  />


  - auth_401_*: preserved facts from the latest unauthorized response snapshot
  - auth_*: latest auth-related facts from the latest request attempt
  - auth_recovery_*: unauthorized recovery state and follow-up result


  Without 401
  <img width="1917" height="522" alt="happy-path auth tags in Sentry"
  src="https://github.com/user-attachments/assets/3381ed28-8022-43b0-b6c0-623a630e679f"
  />

  ###### Summary
  - Add client-visible 401 diagnostics for auth attachment, upstream auth classification, and 401 request id / cf-ray correlation.
  - Record unauthorized recovery mode, phase, outcome, and retry/follow-up status without changing auth behavior.
  - Surface the highest-signal auth and recovery fields on uploaded client bug reports so they are usable in Sentry.
  - Preserve original unauthorized evidence under `auth_401_*` while keeping follow-up result tags separate.

  ###### Rationale (from spec findings)
  - The dominant bucket needed proof of whether the client attached auth before send or upstream still classified the request as missing auth.
  - Client uploads needed to show whether unauthorized recovery ran and what the client tried next.
  - Request id and cf-ray needed to be preserved on the unauthorized response so server-side correlation is immediate.
  - The bug-report path needed the same auth evidence as the request telemetry path, otherwise the observability would not be operationally useful.

  ###### Scope
  - Add auth 401 and unauthorized-recovery observability in `codex-rs/core`, `codex-rs/codex-api`, and `codex-rs/otel`, including feedback-tag surfacing.
  - Keep auth semantics, refresh behavior, retry behavior, endpoint classification, and geo-denial follow-up work out of this PR.

  ###### Trade-offs
  - This exports only safe auth evidence: header presence/name, upstream auth classification, request ids, and recovery state. It does not export token values or raw upstream bodies.
  - This keeps websocket connection reuse as a transport clue because it can help distinguish stale reused sessions from fresh reconnects.
  - Misroute/base-url classification and geo-denial are intentionally deferred to a separate follow-up PR so this review stays focused on the dominant auth 401 bucket.

  ###### Client follow-up
  - PR 2 will add misroute/provider and geo-denial observability plus the matching feedback-tag surfacing.
  - A separate host/app-server PR should log auth-decision inputs so pre-send host auth state can be correlated with client request evidence.
  - `device_id` remains intentionally separate until there is a safe existing source on the feedback upload path.

  ###### Testing
  - `cargo test -p codex-core refresh_available_models_sorts_by_priority`
  - `cargo test -p codex-core emit_feedback_request_tags_`
  - `cargo test -p codex-core emit_feedback_auth_recovery_tags_`
  - `cargo test -p codex-core auth_request_telemetry_context_tracks_attached_auth_and_retry_phase`
  - `cargo test -p codex-core extract_response_debug_context_decodes_identity_headers`
  - `cargo test -p codex-core identity_auth_details`
  - `cargo test -p codex-core telemetry_error_messages_preserve_non_http_details`
  - `cargo test -p codex-core --all-features --no-run`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_api_request_auth_observability`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_connect_auth_observability`
  - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_request_transport_observability`
2026-03-14 15:38:51 -07:00

212 lines
6.8 KiB
Rust

use std::path::Path;
use std::path::PathBuf;
use std::time::Duration;
use codex_protocol::ThreadId;
use rand::Rng;
use tracing::debug;
use tracing::error;
use crate::parse_command::shlex_join;
const INITIAL_DELAY_MS: u64 = 200;
const BACKOFF_FACTOR: f64 = 2.0;
/// Emit structured feedback metadata as key/value pairs.
///
/// This logs a tracing event with `target: "feedback_tags"`. If
/// `codex_feedback::CodexFeedback::metadata_layer()` is installed, these fields are captured and
/// later attached as tags when feedback is uploaded.
///
/// Values are wrapped with [`tracing::field::DebugValue`], so the expression only needs to
/// implement [`std::fmt::Debug`].
///
/// Example:
///
/// ```rust
/// codex_core::feedback_tags!(model = "gpt-5", cached = true);
/// codex_core::feedback_tags!(provider = provider_id, request_id = request_id);
/// ```
#[macro_export]
macro_rules! feedback_tags {
($( $key:ident = $value:expr ),+ $(,)?) => {
::tracing::info!(
target: "feedback_tags",
$( $key = ::tracing::field::debug(&$value) ),+
);
};
}
pub(crate) struct FeedbackRequestTags<'a> {
pub endpoint: &'a str,
pub auth_header_attached: bool,
pub auth_header_name: Option<&'a str>,
pub auth_mode: Option<&'a str>,
pub auth_retry_after_unauthorized: Option<bool>,
pub auth_recovery_mode: Option<&'a str>,
pub auth_recovery_phase: Option<&'a str>,
pub auth_connection_reused: Option<bool>,
pub auth_request_id: Option<&'a str>,
pub auth_cf_ray: Option<&'a str>,
pub auth_error: Option<&'a str>,
pub auth_error_code: Option<&'a str>,
pub auth_recovery_followup_success: Option<bool>,
pub auth_recovery_followup_status: Option<u16>,
}
struct Auth401FeedbackSnapshot<'a> {
request_id: &'a str,
cf_ray: &'a str,
error: &'a str,
error_code: &'a str,
}
impl<'a> Auth401FeedbackSnapshot<'a> {
fn from_optional_fields(
request_id: Option<&'a str>,
cf_ray: Option<&'a str>,
error: Option<&'a str>,
error_code: Option<&'a str>,
) -> Self {
Self {
request_id: request_id.unwrap_or(""),
cf_ray: cf_ray.unwrap_or(""),
error: error.unwrap_or(""),
error_code: error_code.unwrap_or(""),
}
}
}
pub(crate) fn emit_feedback_request_tags(tags: &FeedbackRequestTags<'_>) {
let auth_header_name = tags.auth_header_name.unwrap_or("");
let auth_mode = tags.auth_mode.unwrap_or("");
let auth_retry_after_unauthorized = tags
.auth_retry_after_unauthorized
.map_or_else(String::new, |value| value.to_string());
let auth_recovery_mode = tags.auth_recovery_mode.unwrap_or("");
let auth_recovery_phase = tags.auth_recovery_phase.unwrap_or("");
let auth_connection_reused = tags
.auth_connection_reused
.map_or_else(String::new, |value| value.to_string());
let auth_request_id = tags.auth_request_id.unwrap_or("");
let auth_cf_ray = tags.auth_cf_ray.unwrap_or("");
let auth_error = tags.auth_error.unwrap_or("");
let auth_error_code = tags.auth_error_code.unwrap_or("");
let auth_recovery_followup_success = tags
.auth_recovery_followup_success
.map_or_else(String::new, |value| value.to_string());
let auth_recovery_followup_status = tags
.auth_recovery_followup_status
.map_or_else(String::new, |value| value.to_string());
feedback_tags!(
endpoint = tags.endpoint,
auth_header_attached = tags.auth_header_attached,
auth_header_name = auth_header_name,
auth_mode = auth_mode,
auth_retry_after_unauthorized = auth_retry_after_unauthorized,
auth_recovery_mode = auth_recovery_mode,
auth_recovery_phase = auth_recovery_phase,
auth_connection_reused = auth_connection_reused,
auth_request_id = auth_request_id,
auth_cf_ray = auth_cf_ray,
auth_error = auth_error,
auth_error_code = auth_error_code,
auth_recovery_followup_success = auth_recovery_followup_success,
auth_recovery_followup_status = auth_recovery_followup_status
);
}
pub(crate) fn emit_feedback_auth_recovery_tags(
auth_recovery_mode: &str,
auth_recovery_phase: &str,
auth_recovery_outcome: &str,
auth_request_id: Option<&str>,
auth_cf_ray: Option<&str>,
auth_error: Option<&str>,
auth_error_code: Option<&str>,
) {
let auth_401 = Auth401FeedbackSnapshot::from_optional_fields(
auth_request_id,
auth_cf_ray,
auth_error,
auth_error_code,
);
feedback_tags!(
auth_recovery_mode = auth_recovery_mode,
auth_recovery_phase = auth_recovery_phase,
auth_recovery_outcome = auth_recovery_outcome,
auth_401_request_id = auth_401.request_id,
auth_401_cf_ray = auth_401.cf_ray,
auth_401_error = auth_401.error,
auth_401_error_code = auth_401.error_code
);
}
pub fn backoff(attempt: u64) -> Duration {
let exp = BACKOFF_FACTOR.powi(attempt.saturating_sub(1) as i32);
let base = (INITIAL_DELAY_MS as f64 * exp) as u64;
let jitter = rand::rng().random_range(0.9..1.1);
Duration::from_millis((base as f64 * jitter) as u64)
}
pub(crate) fn error_or_panic(message: impl std::string::ToString) {
if cfg!(debug_assertions) {
panic!("{}", message.to_string());
} else {
error!("{}", message.to_string());
}
}
pub(crate) fn try_parse_error_message(text: &str) -> String {
debug!("Parsing server error response: {}", text);
let json = serde_json::from_str::<serde_json::Value>(text).unwrap_or_default();
if let Some(error) = json.get("error")
&& let Some(message) = error.get("message")
&& let Some(message_str) = message.as_str()
{
return message_str.to_string();
}
if text.is_empty() {
return "Unknown error".to_string();
}
text.to_string()
}
pub fn resolve_path(base: &Path, path: &PathBuf) -> PathBuf {
if path.is_absolute() {
path.clone()
} else {
base.join(path)
}
}
/// Trim a thread name and return `None` if it is empty after trimming.
pub fn normalize_thread_name(name: &str) -> Option<String> {
let trimmed = name.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
pub fn resume_command(thread_name: Option<&str>, thread_id: Option<ThreadId>) -> Option<String> {
let resume_target = thread_name
.filter(|name| !name.is_empty())
.map(str::to_string)
.or_else(|| thread_id.map(|thread_id| thread_id.to_string()));
resume_target.map(|target| {
let needs_double_dash = target.starts_with('-');
let escaped = shlex_join(&[target]);
if needs_double_dash {
format!("codex resume -- {escaped}")
} else {
format!("codex resume {escaped}")
}
})
}
#[cfg(test)]
#[path = "util_tests.rs"]
mod tests;