mirror of
https://github.com/openai/codex.git
synced 2026-05-17 01:32:32 +00:00
CXC-392 [With 401](https://openai.sentry.io/issues/7333870443/?project=4510195390611458&query=019ce8f8-560c-7f10-a00a-c59553740674&referrer=issue-stream) <img width="1909" height="555" alt="401 auth tags in Sentry" src="https://github.com/user-attachments/assets/412ea950-61c4-4780-9697-15c270971ee3" /> - auth_401_*: preserved facts from the latest unauthorized response snapshot - auth_*: latest auth-related facts from the latest request attempt - auth_recovery_*: unauthorized recovery state and follow-up result Without 401 <img width="1917" height="522" alt="happy-path auth tags in Sentry" src="https://github.com/user-attachments/assets/3381ed28-8022-43b0-b6c0-623a630e679f" /> ###### Summary - Add client-visible 401 diagnostics for auth attachment, upstream auth classification, and 401 request id / cf-ray correlation. - Record unauthorized recovery mode, phase, outcome, and retry/follow-up status without changing auth behavior. - Surface the highest-signal auth and recovery fields on uploaded client bug reports so they are usable in Sentry. - Preserve original unauthorized evidence under `auth_401_*` while keeping follow-up result tags separate. ###### Rationale (from spec findings) - The dominant bucket needed proof of whether the client attached auth before send or upstream still classified the request as missing auth. - Client uploads needed to show whether unauthorized recovery ran and what the client tried next. - Request id and cf-ray needed to be preserved on the unauthorized response so server-side correlation is immediate. - The bug-report path needed the same auth evidence as the request telemetry path, otherwise the observability would not be operationally useful. ###### Scope - Add auth 401 and unauthorized-recovery observability in `codex-rs/core`, `codex-rs/codex-api`, and `codex-rs/otel`, including feedback-tag surfacing. - Keep auth semantics, refresh behavior, retry behavior, endpoint classification, and geo-denial follow-up work out of this PR. ###### Trade-offs - This exports only safe auth evidence: header presence/name, upstream auth classification, request ids, and recovery state. It does not export token values or raw upstream bodies. - This keeps websocket connection reuse as a transport clue because it can help distinguish stale reused sessions from fresh reconnects. - Misroute/base-url classification and geo-denial are intentionally deferred to a separate follow-up PR so this review stays focused on the dominant auth 401 bucket. ###### Client follow-up - PR 2 will add misroute/provider and geo-denial observability plus the matching feedback-tag surfacing. - A separate host/app-server PR should log auth-decision inputs so pre-send host auth state can be correlated with client request evidence. - `device_id` remains intentionally separate until there is a safe existing source on the feedback upload path. ###### Testing - `cargo test -p codex-core refresh_available_models_sorts_by_priority` - `cargo test -p codex-core emit_feedback_request_tags_` - `cargo test -p codex-core emit_feedback_auth_recovery_tags_` - `cargo test -p codex-core auth_request_telemetry_context_tracks_attached_auth_and_retry_phase` - `cargo test -p codex-core extract_response_debug_context_decodes_identity_headers` - `cargo test -p codex-core identity_auth_details` - `cargo test -p codex-core telemetry_error_messages_preserve_non_http_details` - `cargo test -p codex-core --all-features --no-run` - `cargo test -p codex-otel otel_export_routing_policy_routes_api_request_auth_observability` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_connect_auth_observability` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_request_transport_observability`
212 lines
6.8 KiB
Rust
212 lines
6.8 KiB
Rust
use std::path::Path;
|
|
use std::path::PathBuf;
|
|
use std::time::Duration;
|
|
|
|
use codex_protocol::ThreadId;
|
|
use rand::Rng;
|
|
use tracing::debug;
|
|
use tracing::error;
|
|
|
|
use crate::parse_command::shlex_join;
|
|
|
|
const INITIAL_DELAY_MS: u64 = 200;
|
|
const BACKOFF_FACTOR: f64 = 2.0;
|
|
|
|
/// Emit structured feedback metadata as key/value pairs.
|
|
///
|
|
/// This logs a tracing event with `target: "feedback_tags"`. If
|
|
/// `codex_feedback::CodexFeedback::metadata_layer()` is installed, these fields are captured and
|
|
/// later attached as tags when feedback is uploaded.
|
|
///
|
|
/// Values are wrapped with [`tracing::field::DebugValue`], so the expression only needs to
|
|
/// implement [`std::fmt::Debug`].
|
|
///
|
|
/// Example:
|
|
///
|
|
/// ```rust
|
|
/// codex_core::feedback_tags!(model = "gpt-5", cached = true);
|
|
/// codex_core::feedback_tags!(provider = provider_id, request_id = request_id);
|
|
/// ```
|
|
#[macro_export]
|
|
macro_rules! feedback_tags {
|
|
($( $key:ident = $value:expr ),+ $(,)?) => {
|
|
::tracing::info!(
|
|
target: "feedback_tags",
|
|
$( $key = ::tracing::field::debug(&$value) ),+
|
|
);
|
|
};
|
|
}
|
|
|
|
pub(crate) struct FeedbackRequestTags<'a> {
|
|
pub endpoint: &'a str,
|
|
pub auth_header_attached: bool,
|
|
pub auth_header_name: Option<&'a str>,
|
|
pub auth_mode: Option<&'a str>,
|
|
pub auth_retry_after_unauthorized: Option<bool>,
|
|
pub auth_recovery_mode: Option<&'a str>,
|
|
pub auth_recovery_phase: Option<&'a str>,
|
|
pub auth_connection_reused: Option<bool>,
|
|
pub auth_request_id: Option<&'a str>,
|
|
pub auth_cf_ray: Option<&'a str>,
|
|
pub auth_error: Option<&'a str>,
|
|
pub auth_error_code: Option<&'a str>,
|
|
pub auth_recovery_followup_success: Option<bool>,
|
|
pub auth_recovery_followup_status: Option<u16>,
|
|
}
|
|
|
|
struct Auth401FeedbackSnapshot<'a> {
|
|
request_id: &'a str,
|
|
cf_ray: &'a str,
|
|
error: &'a str,
|
|
error_code: &'a str,
|
|
}
|
|
|
|
impl<'a> Auth401FeedbackSnapshot<'a> {
|
|
fn from_optional_fields(
|
|
request_id: Option<&'a str>,
|
|
cf_ray: Option<&'a str>,
|
|
error: Option<&'a str>,
|
|
error_code: Option<&'a str>,
|
|
) -> Self {
|
|
Self {
|
|
request_id: request_id.unwrap_or(""),
|
|
cf_ray: cf_ray.unwrap_or(""),
|
|
error: error.unwrap_or(""),
|
|
error_code: error_code.unwrap_or(""),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn emit_feedback_request_tags(tags: &FeedbackRequestTags<'_>) {
|
|
let auth_header_name = tags.auth_header_name.unwrap_or("");
|
|
let auth_mode = tags.auth_mode.unwrap_or("");
|
|
let auth_retry_after_unauthorized = tags
|
|
.auth_retry_after_unauthorized
|
|
.map_or_else(String::new, |value| value.to_string());
|
|
let auth_recovery_mode = tags.auth_recovery_mode.unwrap_or("");
|
|
let auth_recovery_phase = tags.auth_recovery_phase.unwrap_or("");
|
|
let auth_connection_reused = tags
|
|
.auth_connection_reused
|
|
.map_or_else(String::new, |value| value.to_string());
|
|
let auth_request_id = tags.auth_request_id.unwrap_or("");
|
|
let auth_cf_ray = tags.auth_cf_ray.unwrap_or("");
|
|
let auth_error = tags.auth_error.unwrap_or("");
|
|
let auth_error_code = tags.auth_error_code.unwrap_or("");
|
|
let auth_recovery_followup_success = tags
|
|
.auth_recovery_followup_success
|
|
.map_or_else(String::new, |value| value.to_string());
|
|
let auth_recovery_followup_status = tags
|
|
.auth_recovery_followup_status
|
|
.map_or_else(String::new, |value| value.to_string());
|
|
feedback_tags!(
|
|
endpoint = tags.endpoint,
|
|
auth_header_attached = tags.auth_header_attached,
|
|
auth_header_name = auth_header_name,
|
|
auth_mode = auth_mode,
|
|
auth_retry_after_unauthorized = auth_retry_after_unauthorized,
|
|
auth_recovery_mode = auth_recovery_mode,
|
|
auth_recovery_phase = auth_recovery_phase,
|
|
auth_connection_reused = auth_connection_reused,
|
|
auth_request_id = auth_request_id,
|
|
auth_cf_ray = auth_cf_ray,
|
|
auth_error = auth_error,
|
|
auth_error_code = auth_error_code,
|
|
auth_recovery_followup_success = auth_recovery_followup_success,
|
|
auth_recovery_followup_status = auth_recovery_followup_status
|
|
);
|
|
}
|
|
|
|
pub(crate) fn emit_feedback_auth_recovery_tags(
|
|
auth_recovery_mode: &str,
|
|
auth_recovery_phase: &str,
|
|
auth_recovery_outcome: &str,
|
|
auth_request_id: Option<&str>,
|
|
auth_cf_ray: Option<&str>,
|
|
auth_error: Option<&str>,
|
|
auth_error_code: Option<&str>,
|
|
) {
|
|
let auth_401 = Auth401FeedbackSnapshot::from_optional_fields(
|
|
auth_request_id,
|
|
auth_cf_ray,
|
|
auth_error,
|
|
auth_error_code,
|
|
);
|
|
feedback_tags!(
|
|
auth_recovery_mode = auth_recovery_mode,
|
|
auth_recovery_phase = auth_recovery_phase,
|
|
auth_recovery_outcome = auth_recovery_outcome,
|
|
auth_401_request_id = auth_401.request_id,
|
|
auth_401_cf_ray = auth_401.cf_ray,
|
|
auth_401_error = auth_401.error,
|
|
auth_401_error_code = auth_401.error_code
|
|
);
|
|
}
|
|
|
|
pub fn backoff(attempt: u64) -> Duration {
|
|
let exp = BACKOFF_FACTOR.powi(attempt.saturating_sub(1) as i32);
|
|
let base = (INITIAL_DELAY_MS as f64 * exp) as u64;
|
|
let jitter = rand::rng().random_range(0.9..1.1);
|
|
Duration::from_millis((base as f64 * jitter) as u64)
|
|
}
|
|
|
|
pub(crate) fn error_or_panic(message: impl std::string::ToString) {
|
|
if cfg!(debug_assertions) {
|
|
panic!("{}", message.to_string());
|
|
} else {
|
|
error!("{}", message.to_string());
|
|
}
|
|
}
|
|
|
|
pub(crate) fn try_parse_error_message(text: &str) -> String {
|
|
debug!("Parsing server error response: {}", text);
|
|
let json = serde_json::from_str::<serde_json::Value>(text).unwrap_or_default();
|
|
if let Some(error) = json.get("error")
|
|
&& let Some(message) = error.get("message")
|
|
&& let Some(message_str) = message.as_str()
|
|
{
|
|
return message_str.to_string();
|
|
}
|
|
if text.is_empty() {
|
|
return "Unknown error".to_string();
|
|
}
|
|
text.to_string()
|
|
}
|
|
|
|
pub fn resolve_path(base: &Path, path: &PathBuf) -> PathBuf {
|
|
if path.is_absolute() {
|
|
path.clone()
|
|
} else {
|
|
base.join(path)
|
|
}
|
|
}
|
|
|
|
/// Trim a thread name and return `None` if it is empty after trimming.
|
|
pub fn normalize_thread_name(name: &str) -> Option<String> {
|
|
let trimmed = name.trim();
|
|
if trimmed.is_empty() {
|
|
None
|
|
} else {
|
|
Some(trimmed.to_string())
|
|
}
|
|
}
|
|
|
|
pub fn resume_command(thread_name: Option<&str>, thread_id: Option<ThreadId>) -> Option<String> {
|
|
let resume_target = thread_name
|
|
.filter(|name| !name.is_empty())
|
|
.map(str::to_string)
|
|
.or_else(|| thread_id.map(|thread_id| thread_id.to_string()));
|
|
resume_target.map(|target| {
|
|
let needs_double_dash = target.starts_with('-');
|
|
let escaped = shlex_join(&[target]);
|
|
if needs_double_dash {
|
|
format!("codex resume -- {escaped}")
|
|
} else {
|
|
format!("codex resume {escaped}")
|
|
}
|
|
})
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[path = "util_tests.rs"]
|
|
mod tests;
|