mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feat: drop discrepency metrics (#13753)
This commit is contained in:
@@ -1203,7 +1203,9 @@ async fn find_thread_path_by_id_str_in_subdir(
|
||||
"state db returned stale rollout path for thread {id_str}: {}",
|
||||
db_path.display()
|
||||
);
|
||||
state_db::record_discrepancy("find_thread_path_by_id_str_in_subdir", "stale_db_path");
|
||||
tracing::warn!(
|
||||
"state db discrepancy during find_thread_path_by_id_str_in_subdir: stale_db_path"
|
||||
);
|
||||
}
|
||||
|
||||
let mut root = codex_home.to_path_buf();
|
||||
@@ -1227,7 +1229,9 @@ async fn find_thread_path_by_id_str_in_subdir(
|
||||
let found = results.matches.into_iter().next().map(|m| m.full_path());
|
||||
if let Some(found_path) = found.as_ref() {
|
||||
tracing::debug!("state db missing rollout path for thread {id_str}");
|
||||
state_db::record_discrepancy("find_thread_path_by_id_str_in_subdir", "falling_back");
|
||||
tracing::warn!(
|
||||
"state db discrepancy during find_thread_path_by_id_str_in_subdir: falling_back"
|
||||
);
|
||||
state_db::read_repair_rollout_path(
|
||||
state_db_ctx.as_deref(),
|
||||
thread_id,
|
||||
|
||||
@@ -290,7 +290,7 @@ impl RolloutRecorder {
|
||||
}
|
||||
// If SQLite listing still fails, return the filesystem page rather than failing the list.
|
||||
tracing::error!("Falling back on rollout system");
|
||||
state_db::record_discrepancy("list_threads_with_db_fallback", "falling_back");
|
||||
tracing::warn!("state db discrepancy during list_threads_with_db_fallback: falling_back");
|
||||
Ok(truncate_fs_page(fs_page, page_size, sort_key))
|
||||
}
|
||||
|
||||
|
||||
@@ -12,9 +12,7 @@ use codex_protocol::ThreadId;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_state::DB_METRIC_COMPARE_ERROR;
|
||||
pub use codex_state::LogEntry;
|
||||
use codex_state::STATE_DB_VERSION;
|
||||
use codex_state::ThreadMetadataBuilder;
|
||||
use serde_json::Value;
|
||||
use std::path::Path;
|
||||
@@ -267,7 +265,7 @@ pub async fn list_threads_db(
|
||||
item.id,
|
||||
item.rollout_path.display()
|
||||
);
|
||||
record_discrepancy("list_threads_db", "stale_db_path_dropped");
|
||||
warn!("state db discrepancy during list_threads_db: stale_db_path_dropped");
|
||||
let _ = ctx.delete_thread(item.id).await;
|
||||
}
|
||||
}
|
||||
@@ -459,7 +457,7 @@ pub async fn read_repair_rollout_path(
|
||||
if repaired == metadata {
|
||||
return;
|
||||
}
|
||||
record_discrepancy("read_repair_rollout_path", "upsert_needed");
|
||||
warn!("state db discrepancy during read_repair_rollout_path: upsert_needed (fast path)");
|
||||
if let Err(err) = ctx.upsert_thread(&repaired).await {
|
||||
warn!(
|
||||
"state db read-repair upsert failed for {}: {err}",
|
||||
@@ -473,7 +471,7 @@ pub async fn read_repair_rollout_path(
|
||||
// Slow path: when the row is missing/unreadable (or direct upsert failed),
|
||||
// rebuild metadata from rollout contents and reconcile it into SQLite.
|
||||
if !saw_existing_metadata {
|
||||
record_discrepancy("read_repair_rollout_path", "upsert_needed");
|
||||
warn!("state db discrepancy during read_repair_rollout_path: upsert_needed (slow path)");
|
||||
}
|
||||
let default_provider = crate::rollout::list::read_session_meta_line(rollout_path)
|
||||
.await
|
||||
@@ -514,7 +512,7 @@ pub async fn apply_rollout_items(
|
||||
"state db apply_rollout_items missing builder during {stage}: {}",
|
||||
rollout_path.display()
|
||||
);
|
||||
record_discrepancy(stage, "missing_builder");
|
||||
warn!("state db discrepancy during apply_rollout_items: {stage}, missing_builder");
|
||||
return;
|
||||
}
|
||||
},
|
||||
@@ -532,24 +530,6 @@ pub async fn apply_rollout_items(
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a state discrepancy metric with a stage and reason tag.
|
||||
pub fn record_discrepancy(stage: &str, reason: &str) {
|
||||
// We access the global metric because the call sites might not have access to the broader
|
||||
// OtelManager.
|
||||
tracing::warn!("state db record_discrepancy: {stage}, {reason}");
|
||||
if let Some(metric) = codex_otel::metrics::global() {
|
||||
let _ = metric.counter(
|
||||
DB_METRIC_COMPARE_ERROR,
|
||||
1,
|
||||
&[
|
||||
("stage", stage),
|
||||
("reason", reason),
|
||||
("version", &STATE_DB_VERSION.to_string()),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -60,5 +60,3 @@ pub const DB_ERROR_METRIC: &str = "codex.db.error";
|
||||
pub const DB_METRIC_BACKFILL: &str = "codex.db.backfill";
|
||||
/// Metrics on backfill duration. Tags: [status]
|
||||
pub const DB_METRIC_BACKFILL_DURATION_MS: &str = "codex.db.backfill.duration_ms";
|
||||
/// Metrics on errors during comparison between DB and rollout file. Tags: [stage]
|
||||
pub const DB_METRIC_COMPARE_ERROR: &str = "codex.db.compare_error";
|
||||
|
||||
Reference in New Issue
Block a user