Add process-scoped SQLite telemetry (#22154)

## Summary
- add SQLite init, backfill-gate, and fallback telemetry without
introducing a cross-cutting state-db access wrapper
- install one process-scoped telemetry sink after OTEL startup and let
low-level state/rollout paths emit through it directly
- add process-start metrics for the process owners that initialize
SQLite

---------

Co-authored-by: Owen Lin <owen@openai.com>
This commit is contained in:
jif-oai
2026-05-11 20:32:40 +02:00
committed by GitHub
parent cf6342b75b
commit b401666ca5
22 changed files with 771 additions and 134 deletions

View File

@@ -104,6 +104,7 @@ pub use crate::transport::AppServerTransport;
pub use crate::transport::app_server_control_socket_path;
const LOG_FORMAT_ENV_VAR: &str = "LOG_FORMAT";
const OTEL_SERVICE_NAME: &str = "codex-app-server";
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum LogFormat {
@@ -503,6 +504,20 @@ pub async fn run_main_with_transport_options(
}
};
let otel = codex_core::otel_init::build_provider(
&config,
env!("CARGO_PKG_VERSION"),
Some(OTEL_SERVICE_NAME),
default_analytics_enabled,
)
.map_err(|e| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("error loading otel config: {e}"),
)
})?;
codex_core::otel_init::record_process_start(otel.as_ref(), OTEL_SERVICE_NAME);
codex_core::otel_init::install_sqlite_telemetry(otel.as_ref(), OTEL_SERVICE_NAME);
let state_db_result = rollout_state_db::try_init(&config).await;
let state_db_init_error = state_db_result.as_ref().err().map(ToString::to_string);
let state_db = state_db_result.ok();
@@ -582,19 +597,6 @@ pub async fn run_main_with_transport_options(
let feedback = CodexFeedback::new();
let otel = codex_core::otel_init::build_provider(
&config,
env!("CARGO_PKG_VERSION"),
Some("codex-app-server"),
default_analytics_enabled,
)
.map_err(|e| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("error loading otel config: {e}"),
)
})?;
// Install a simple subscriber so `tracing` output is visible. Users can
// control the log level with `RUST_LOG` and switch to JSON logs with
// `LOG_FORMAT=json`.

View File

@@ -65,7 +65,6 @@ use codex_arg0::Arg0DispatchPaths;
use codex_chatgpt::workspace_settings;
use codex_core::ThreadManager;
use codex_core::config::Config;
use codex_core::thread_store_from_config;
use codex_exec_server::EnvironmentManager;
use codex_feedback::CodexFeedback;
use codex_login::AuthManager;
@@ -298,7 +297,7 @@ impl MessageProcessor {
// The thread store is intentionally process-scoped. Config reloads can
// affect per-thread behavior, but they must not move newly started,
// resumed, or forked threads to a different persistence backend/root.
let thread_store = thread_store_from_config(config.as_ref(), state_db.clone());
let thread_store = codex_core::thread_store_from_config(config.as_ref(), state_db.clone());
let thread_manager = Arc::new(ThreadManager::new(
config.as_ref(),
auth_manager.clone(),

View File

@@ -277,7 +277,6 @@ use codex_core::exec::ExecCapturePolicy;
use codex_core::exec::ExecExpiration;
use codex_core::exec::ExecParams;
use codex_core::exec_env::create_env;
use codex_core::find_thread_path_by_id_str;
use codex_core::path_utils;
#[cfg(test)]
use codex_core::read_head_for_summary;

View File

@@ -107,7 +107,7 @@ impl ThreadGoalRequestProcessor {
"ephemeral thread does not support goals: {thread_id}"
))
})?,
None => find_thread_path_by_id_str(
None => codex_rollout::find_thread_path_by_id_str(
&self.config.codex_home,
&thread_id.to_string(),
self.state_db.as_deref(),
@@ -271,7 +271,7 @@ impl ThreadGoalRequestProcessor {
"ephemeral thread does not support goals: {thread_id}"
))
})?,
None => find_thread_path_by_id_str(
None => codex_rollout::find_thread_path_by_id_str(
&self.config.codex_home,
&thread_id.to_string(),
self.state_db.as_deref(),
@@ -335,7 +335,7 @@ impl ThreadGoalRequestProcessor {
return Ok(state_db);
}
} else {
find_thread_path_by_id_str(
codex_rollout::find_thread_path_by_id_str(
&self.config.codex_home,
&thread_id.to_string(),
self.state_db.as_deref(),