Files
codex/codex-rs/rollout/src/state_db.rs
Ruslan Nigmatullin 4d201e340e state: pass state db handles through consumers (#20561)
## Why

SQLite state was still being opened from consumer paths, including lazy
`OnceCell`-backed thread-store call sites. That let one process
construct multiple state DB connections for the same Codex home, which
makes SQLite lock contention and `database is locked` failures much
easier to hit.

State DB lifetime should be chosen by main-like entrypoints and tests,
then passed through explicitly. Consumers should use the supplied
`Option<StateDbHandle>` or `StateDbHandle` and keep their existing
filesystem fallback or error behavior when no handle is available.

The startup path also needs to keep the rollout crate in charge of
SQLite state initialization. Opening `codex_state::StateRuntime`
directly bypasses rollout metadata backfill, so entrypoints should
initialize through `codex_rollout::state_db` and receive a handle only
after required rollout backfills have completed.

## What Changed

- Initialize the state DB in main-like entrypoints for CLI, TUI,
app-server, exec, MCP server, and the thread-manager sample.
- Pass `Option<StateDbHandle>` through `ThreadManager`,
`LocalThreadStore`, app-server processors, TUI app wiring, rollout
listing/recording, personality migration, shell snapshot cleanup,
session-name lookup, and memory/device-key consumers.
- Remove the lazy local state DB wrapper from the thread store so
non-test consumers use only the supplied handle or their existing
fallback path.
- Make `codex_rollout::state_db::init` the local state startup path: it
opens/migrates SQLite, runs rollout metadata backfill when needed, waits
for concurrent backfill workers up to a bounded timeout, verifies
completion, and then returns the initialized handle.
- Keep optional/non-owning SQLite helpers, such as remote TUI local
reads, as open-only paths that do not run startup backfill.
- Switch app-server startup from direct
`codex_state::StateRuntime::init` to the rollout state initializer so
app-server cannot skip rollout backfill.
- Collapse split rollout lookup/list APIs so callers use the normal
methods with an optional state handle instead of `_with_state_db`
variants.
- Restore `getConversationSummary(ThreadId)` to delegate through
`ThreadStore::read_thread` instead of a LocalThreadStore-specific
rollout path special case.
- Keep DB-backed rollout path lookup keyed on the DB row and file
existence, without imposing the filesystem filename convention on
existing DB rows.
- Verify readable DB-backed rollout paths against `session_meta.id`
before returning them, so a stale SQLite row that points at another
thread's JSONL falls back to filesystem search and read-repairs the DB
row.
- Keep `debug prompt-input` filesystem-only so a one-off debug command
does not initialize or backfill SQLite state just to print prompt input.
- Keep goal-session test Codex homes alive only in the goal-specific
helper, rather than leaking tempdirs from the shared session test
helper.
- Update tests and call sites to pass explicit state handles where DB
behavior is expected and explicit `None` where filesystem-only behavior
is intended.

## Validation

- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo check -p
codex-rollout -p codex-thread-store -p codex-app-server -p codex-core -p
codex-tui -p codex-exec -p codex-cli --tests`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-rollout state_db_`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-rollout find_thread_path`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-rollout find_thread_path -- --nocapture`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-rollout try_init_ -- --nocapture`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-rollout`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo clippy -p
codex-rollout --lib -- -D warnings`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-thread-store
read_thread_falls_back_when_sqlite_path_points_to_another_thread --
--nocapture`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-thread-store`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
shell_snapshot`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
--test all personality_migration`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
--test all rollout_list_find`
- `RUST_MIN_STACK=8388608 CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
--test all rollout_list_find::find_prefers_sqlite_path_by_id --
--nocapture`
- `RUST_MIN_STACK=8388608 CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
--test all rollout_list_find -- --nocapture`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p codex-core
interrupt_accounts_active_goal_before_pausing`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-app-server get_auth_status -- --test-threads=1`
- `CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo test -p
codex-app-server --lib`
- `CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db cargo check -p codex-rollout
-p codex-app-server --tests`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db just fix -p codex-rollout
-p codex-thread-store -p codex-core -p codex-app-server -p codex-tui -p
codex-exec -p codex-cli`
- `CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db just fix -p codex-rollout -p
codex-app-server`
- `CARGO_TARGET_DIR=/tmp/codex-target-state-db just fix -p
codex-rollout`
- `CODEX_SKIP_VENDORED_BWRAP=1
CARGO_TARGET_DIR=/tmp/codex-target-state-db just fix -p codex-core`
- `just argument-comment-lint -p codex-core`
- `just argument-comment-lint -p codex-rollout`

Focused coverage added in `codex-rollout`:

- `recorder::tests::state_db_init_backfills_before_returning` verifies
the rollout metadata row exists before startup init returns.
- `state_db::tests::try_init_waits_for_concurrent_startup_backfill`
verifies startup waits for another worker to finish backfill instead of
disabling the handle for the process.
-
`state_db::tests::try_init_times_out_waiting_for_stuck_startup_backfill`
verifies startup does not hang indefinitely on a stuck backfill lease.
-
`tests::find_thread_path_accepts_existing_state_db_path_without_canonical_filename`
verifies DB-backed lookup accepts valid existing rollout paths even when
the filename does not include the thread UUID.
-
`tests::find_thread_path_falls_back_when_db_path_points_to_another_thread`
verifies DB-backed lookup ignores a stale row whose existing path
belongs to another thread and read-repairs the row after filesystem
fallback.

Focused coverage updated in `codex-core`:

- `rollout_list_find::find_prefers_sqlite_path_by_id` now uses a
DB-preferred rollout file with matching `session_meta.id`, so it still
verifies that valid SQLite paths win without depending on stale/empty
rollout contents.

`cargo test -p codex-app-server thread_list_respects_search_term_filter
-- --test-threads=1 --nocapture` was attempted locally but timed out
waiting for the app-server test harness `initialize` response before
reaching the changed thread-list code path.

`bazel test //codex-rs/thread-store:thread-store-unit-tests
--test_output=errors` was attempted locally after the thread-store fix,
but this container failed before target analysis while fetching `v8+`
through BuildBuddy/direct GitHub. The equivalent local crate coverage,
including `cargo test -p codex-thread-store`, passes.

A plain local `cargo check -p codex-rollout -p codex-app-server --tests`
also requires system `libcap.pc` for `codex-linux-sandbox`; the
follow-up app-server check above used `CODEX_SKIP_VENDORED_BWRAP=1` in
this container.
2026-05-04 11:46:03 -07:00

669 lines
21 KiB
Rust

use crate::config::RolloutConfig;
use crate::config::RolloutConfigView;
use crate::list::Cursor;
use crate::list::SortDirection;
use crate::list::ThreadSortKey;
use crate::metadata;
use chrono::DateTime;
use chrono::Utc;
use codex_protocol::ThreadId;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionSource;
pub use codex_state::LogEntry;
use codex_state::ThreadMetadataBuilder;
use codex_utils_path::normalize_for_path_comparison;
use serde_json::Value;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use tracing::info;
use tracing::warn;
/// Core-facing handle to the SQLite-backed state runtime.
pub type StateDbHandle = Arc<codex_state::StateRuntime>;
#[cfg(not(test))]
const STARTUP_BACKFILL_POLL_INTERVAL: Duration = Duration::from_secs(1);
#[cfg(test)]
const STARTUP_BACKFILL_POLL_INTERVAL: Duration = Duration::from_millis(10);
#[cfg(not(test))]
const STARTUP_BACKFILL_WAIT_TIMEOUT: Duration = Duration::from_secs(30);
#[cfg(test)]
const STARTUP_BACKFILL_WAIT_TIMEOUT: Duration = Duration::from_secs(2);
/// Initialize the state runtime for thread state persistence.
///
/// This is the process entry point for local state: it opens the SQLite-backed
/// runtime, applies rollout metadata backfills as needed, and returns the
/// initialized handle.
pub async fn init(config: &impl RolloutConfigView) -> Option<StateDbHandle> {
let config = RolloutConfig::from_view(config);
match try_init_with_roots(
config.codex_home,
config.sqlite_home,
config.model_provider_id,
)
.await
{
Ok(runtime) => Some(runtime),
Err(err) => {
emit_startup_warning(&format!("failed to initialize state runtime: {err}"));
None
}
}
}
/// Initialize the state runtime and return any initialization error to the caller.
///
/// Prefer [`init`] unless the caller needs to surface the exact failure after
/// tracing or UI setup has completed.
pub async fn try_init(config: &impl RolloutConfigView) -> anyhow::Result<StateDbHandle> {
let config = RolloutConfig::from_view(config);
try_init_with_roots(
config.codex_home,
config.sqlite_home,
config.model_provider_id,
)
.await
}
async fn try_init_with_roots(
codex_home: PathBuf,
sqlite_home: PathBuf,
default_model_provider_id: String,
) -> anyhow::Result<StateDbHandle> {
try_init_with_roots_inner(
codex_home,
sqlite_home,
default_model_provider_id,
/*backfill_lease_seconds*/ None,
)
.await
}
#[cfg(test)]
async fn try_init_with_roots_and_backfill_lease(
codex_home: PathBuf,
sqlite_home: PathBuf,
default_model_provider_id: String,
backfill_lease_seconds: i64,
) -> anyhow::Result<StateDbHandle> {
try_init_with_roots_inner(
codex_home,
sqlite_home,
default_model_provider_id,
Some(backfill_lease_seconds),
)
.await
}
async fn try_init_with_roots_inner(
codex_home: PathBuf,
sqlite_home: PathBuf,
default_model_provider_id: String,
backfill_lease_seconds: Option<i64>,
) -> anyhow::Result<StateDbHandle> {
let runtime =
codex_state::StateRuntime::init(sqlite_home.clone(), default_model_provider_id.clone())
.await
.map_err(|err| {
anyhow::anyhow!(
"failed to initialize state runtime at {}: {err}",
sqlite_home.display()
)
})?;
let wait_started = Instant::now();
let mut reported_wait = false;
loop {
let backfill_state = runtime.get_backfill_state().await.map_err(|err| {
anyhow::anyhow!(
"failed to read backfill state at {}: {err}",
codex_home.display()
)
})?;
if backfill_state.status == codex_state::BackfillStatus::Complete {
return Ok(runtime);
}
if let Some(backfill_lease_seconds) = backfill_lease_seconds {
metadata::backfill_sessions_with_lease(
runtime.as_ref(),
codex_home.as_path(),
default_model_provider_id.as_str(),
backfill_lease_seconds,
)
.await;
} else {
metadata::backfill_sessions(
runtime.as_ref(),
codex_home.as_path(),
default_model_provider_id.as_str(),
)
.await;
}
let backfill_state = runtime.get_backfill_state().await.map_err(|err| {
anyhow::anyhow!(
"failed to read backfill state at {} after startup backfill: {err}",
codex_home.display()
)
})?;
if backfill_state.status == codex_state::BackfillStatus::Complete {
return Ok(runtime);
}
if wait_started.elapsed() >= STARTUP_BACKFILL_WAIT_TIMEOUT {
return Err(anyhow::anyhow!(
"timed out waiting for state db backfill at {} after {:?} (status: {})",
codex_home.display(),
STARTUP_BACKFILL_WAIT_TIMEOUT,
backfill_state.status.as_str()
));
}
let message = format!(
"state db backfill is {} at {}; waiting up to {:?} before retrying startup initialization",
backfill_state.status.as_str(),
codex_home.display(),
STARTUP_BACKFILL_WAIT_TIMEOUT,
);
if reported_wait {
info!("{message}");
} else {
emit_startup_warning(&message);
reported_wait = true;
}
tokio::time::sleep(STARTUP_BACKFILL_POLL_INTERVAL).await;
}
}
fn emit_startup_warning(message: &str) {
warn!("{message}");
if !tracing::dispatcher::has_been_set() {
#[allow(clippy::print_stderr)]
{
eprintln!("{message}");
}
}
}
/// Open the DB if it exists and its startup backfill has already completed.
///
/// Unlike [`init`], this helper does not run rollout backfill. It is for
/// optional local reads from non-owning contexts such as remote app-server mode.
pub async fn get_state_db(config: &impl RolloutConfigView) -> Option<StateDbHandle> {
let state_path = codex_state::state_db_path(config.sqlite_home());
if !tokio::fs::try_exists(&state_path).await.unwrap_or(false) {
return None;
}
let runtime = codex_state::StateRuntime::init(
config.sqlite_home().to_path_buf(),
config.model_provider_id().to_string(),
)
.await
.ok()?;
require_backfill_complete(runtime, config.sqlite_home()).await
}
async fn require_backfill_complete(
runtime: StateDbHandle,
codex_home: &Path,
) -> Option<StateDbHandle> {
match runtime.get_backfill_state().await {
Ok(state) if state.status == codex_state::BackfillStatus::Complete => Some(runtime),
Ok(state) => {
warn!(
"state db backfill not complete at {} (status: {})",
codex_home.display(),
state.status.as_str()
);
None
}
Err(err) => {
warn!(
"failed to read backfill state at {}: {err}",
codex_home.display()
);
None
}
}
}
fn cursor_to_anchor(cursor: Option<&Cursor>) -> Option<codex_state::Anchor> {
let cursor = cursor?;
let millis = cursor.timestamp().unix_timestamp_nanos() / 1_000_000;
let millis = i64::try_from(millis).ok()?;
let ts = chrono::DateTime::<Utc>::from_timestamp_millis(millis)?;
Some(codex_state::Anchor { ts })
}
pub fn normalize_cwd_for_state_db(cwd: &Path) -> PathBuf {
normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf())
}
/// List thread ids from SQLite for parity checks without rollout scanning.
#[allow(clippy::too_many_arguments)]
pub async fn list_thread_ids_db(
context: Option<&codex_state::StateRuntime>,
codex_home: &Path,
page_size: usize,
cursor: Option<&Cursor>,
sort_key: ThreadSortKey,
allowed_sources: &[SessionSource],
model_providers: Option<&[String]>,
archived_only: bool,
stage: &str,
) -> Option<Vec<ThreadId>> {
let ctx = context?;
if ctx.codex_home() != codex_home {
warn!(
"state db codex_home mismatch: expected {}, got {}",
ctx.codex_home().display(),
codex_home.display()
);
}
let anchor = cursor_to_anchor(cursor);
let allowed_sources: Vec<String> = allowed_sources
.iter()
.map(|value| match serde_json::to_value(value) {
Ok(Value::String(s)) => s,
Ok(other) => other.to_string(),
Err(_) => String::new(),
})
.collect();
let model_providers = model_providers.map(<[String]>::to_vec);
match ctx
.list_thread_ids(
page_size,
anchor.as_ref(),
match sort_key {
ThreadSortKey::CreatedAt => codex_state::SortKey::CreatedAt,
ThreadSortKey::UpdatedAt => codex_state::SortKey::UpdatedAt,
},
allowed_sources.as_slice(),
model_providers.as_deref(),
archived_only,
)
.await
{
Ok(ids) => Some(ids),
Err(err) => {
warn!("state db list_thread_ids failed during {stage}: {err}");
None
}
}
}
/// List thread metadata from SQLite without rollout directory traversal.
#[allow(clippy::too_many_arguments)]
pub async fn list_threads_db(
context: Option<&codex_state::StateRuntime>,
codex_home: &Path,
page_size: usize,
cursor: Option<&Cursor>,
sort_key: ThreadSortKey,
sort_direction: SortDirection,
allowed_sources: &[SessionSource],
model_providers: Option<&[String]>,
cwd_filters: Option<&[PathBuf]>,
archived: bool,
search_term: Option<&str>,
) -> Option<codex_state::ThreadsPage> {
let ctx = context?;
if ctx.codex_home() != codex_home {
warn!(
"state db codex_home mismatch: expected {}, got {}",
ctx.codex_home().display(),
codex_home.display()
);
}
let anchor = cursor_to_anchor(cursor);
let allowed_sources: Vec<String> = allowed_sources
.iter()
.map(|value| match serde_json::to_value(value) {
Ok(Value::String(s)) => s,
Ok(other) => other.to_string(),
Err(_) => String::new(),
})
.collect();
let model_providers = model_providers.map(<[String]>::to_vec);
let normalized_cwd_filters = cwd_filters.map(|filters| {
filters
.iter()
.map(|cwd| normalize_cwd_for_state_db(cwd))
.collect::<Vec<_>>()
});
match ctx
.list_threads(
page_size,
codex_state::ThreadFilterOptions {
archived_only: archived,
allowed_sources: allowed_sources.as_slice(),
model_providers: model_providers.as_deref(),
cwd_filters: normalized_cwd_filters.as_deref(),
anchor: anchor.as_ref(),
sort_key: match sort_key {
ThreadSortKey::CreatedAt => codex_state::SortKey::CreatedAt,
ThreadSortKey::UpdatedAt => codex_state::SortKey::UpdatedAt,
},
sort_direction: match sort_direction {
SortDirection::Asc => codex_state::SortDirection::Asc,
SortDirection::Desc => codex_state::SortDirection::Desc,
},
search_term,
},
)
.await
{
Ok(mut page) => {
let mut valid_items = Vec::with_capacity(page.items.len());
for item in page.items {
if tokio::fs::try_exists(&item.rollout_path)
.await
.unwrap_or(false)
{
valid_items.push(item);
} else {
warn!(
"state db list_threads returned stale rollout path for thread {}: {}",
item.id,
item.rollout_path.display()
);
warn!("state db discrepancy during list_threads_db: stale_db_path_dropped");
let _ = ctx.delete_thread(item.id).await;
}
}
page.items = valid_items;
Some(page)
}
Err(err) => {
warn!("state db list_threads failed: {err}");
None
}
}
}
/// Look up the rollout path for a thread id using SQLite.
pub async fn find_rollout_path_by_id(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
archived_only: Option<bool>,
stage: &str,
) -> Option<PathBuf> {
let ctx = context?;
ctx.find_rollout_path_by_id(thread_id, archived_only)
.await
.unwrap_or_else(|err| {
warn!("state db find_rollout_path_by_id failed during {stage}: {err}");
None
})
}
/// Get dynamic tools for a thread id using SQLite.
pub async fn get_dynamic_tools(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
stage: &str,
) -> Option<Vec<DynamicToolSpec>> {
let ctx = context?;
match ctx.get_dynamic_tools(thread_id).await {
Ok(tools) => tools,
Err(err) => {
warn!("state db get_dynamic_tools failed during {stage}: {err}");
None
}
}
}
/// Persist dynamic tools for a thread id using SQLite, if none exist yet.
pub async fn persist_dynamic_tools(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
tools: Option<&[DynamicToolSpec]>,
stage: &str,
) {
let Some(ctx) = context else {
return;
};
if let Err(err) = ctx.persist_dynamic_tools(thread_id, tools).await {
warn!("state db persist_dynamic_tools failed during {stage}: {err}");
}
}
pub async fn mark_thread_memory_mode_polluted(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
stage: &str,
) {
let Some(ctx) = context else {
return;
};
if let Err(err) = ctx.mark_thread_memory_mode_polluted(thread_id).await {
warn!("state db mark_thread_memory_mode_polluted failed during {stage}: {err}");
}
}
/// Reconcile rollout items into SQLite, falling back to scanning the rollout file.
pub async fn reconcile_rollout(
context: Option<&codex_state::StateRuntime>,
rollout_path: &Path,
default_provider: &str,
builder: Option<&ThreadMetadataBuilder>,
items: &[RolloutItem],
archived_only: Option<bool>,
new_thread_memory_mode: Option<&str>,
) {
let Some(ctx) = context else {
return;
};
if builder.is_some() || !items.is_empty() {
apply_rollout_items(
Some(ctx),
rollout_path,
default_provider,
builder,
items,
"reconcile_rollout",
new_thread_memory_mode,
/*updated_at_override*/ None,
)
.await;
return;
}
let outcome =
match metadata::extract_metadata_from_rollout(rollout_path, default_provider).await {
Ok(outcome) => outcome,
Err(err) => {
warn!(
"state db reconcile_rollout extraction failed {}: {err}",
rollout_path.display()
);
return;
}
};
let mut metadata = outcome.metadata;
let memory_mode = outcome.memory_mode.unwrap_or_else(|| "enabled".to_string());
metadata.cwd = normalize_cwd_for_state_db(&metadata.cwd);
if let Ok(Some(existing_metadata)) = ctx.get_thread(metadata.id).await {
metadata.prefer_existing_git_info(&existing_metadata);
}
match archived_only {
Some(true) if metadata.archived_at.is_none() => {
metadata.archived_at = Some(metadata.updated_at);
}
Some(false) => {
metadata.archived_at = None;
}
Some(true) | None => {}
}
if let Err(err) = ctx.upsert_thread(&metadata).await {
warn!(
"state db reconcile_rollout upsert failed {}: {err}",
rollout_path.display()
);
return;
}
if let Err(err) = ctx
.set_thread_memory_mode(metadata.id, memory_mode.as_str())
.await
{
warn!(
"state db reconcile_rollout memory_mode update failed {}: {err}",
rollout_path.display()
);
return;
}
if let Ok(meta_line) = crate::list::read_session_meta_line(rollout_path).await {
persist_dynamic_tools(
Some(ctx),
meta_line.meta.id,
meta_line.meta.dynamic_tools.as_deref(),
"reconcile_rollout",
)
.await;
} else {
warn!(
"state db reconcile_rollout missing session meta {}",
rollout_path.display()
);
}
}
/// Repair a thread's rollout path after filesystem fallback succeeds.
pub async fn read_repair_rollout_path(
context: Option<&codex_state::StateRuntime>,
thread_id: Option<ThreadId>,
archived_only: Option<bool>,
rollout_path: &Path,
) {
let Some(ctx) = context else {
return;
};
// Fast path: update an existing metadata row in place, but avoid writes when
// read-repair computes no effective change.
let mut saw_existing_metadata = false;
if let Some(thread_id) = thread_id
&& let Ok(Some(metadata)) = ctx.get_thread(thread_id).await
{
saw_existing_metadata = true;
let mut repaired = metadata.clone();
repaired.rollout_path = rollout_path.to_path_buf();
repaired.cwd = normalize_cwd_for_state_db(&repaired.cwd);
match archived_only {
Some(true) if repaired.archived_at.is_none() => {
repaired.archived_at = Some(repaired.updated_at);
}
Some(false) => {
repaired.archived_at = None;
}
Some(true) | None => {}
}
if repaired == metadata {
return;
}
warn!("state db discrepancy during read_repair_rollout_path: upsert_needed (fast path)");
if let Err(err) = ctx.upsert_thread(&repaired).await {
warn!(
"state db read-repair upsert failed for {}: {err}",
rollout_path.display()
);
} else {
return;
}
}
// Slow path: when the row is missing/unreadable (or direct upsert failed),
// rebuild metadata from rollout contents and reconcile it into SQLite.
if !saw_existing_metadata {
warn!("state db discrepancy during read_repair_rollout_path: upsert_needed (slow path)");
}
let default_provider = crate::list::read_session_meta_line(rollout_path)
.await
.ok()
.and_then(|meta| meta.meta.model_provider)
.unwrap_or_default();
reconcile_rollout(
Some(ctx),
rollout_path,
default_provider.as_str(),
/*builder*/ None,
&[],
archived_only,
/*new_thread_memory_mode*/ None,
)
.await;
}
/// Apply rollout items incrementally to SQLite.
#[allow(clippy::too_many_arguments)]
pub async fn apply_rollout_items(
context: Option<&codex_state::StateRuntime>,
rollout_path: &Path,
default_provider: &str,
builder: Option<&ThreadMetadataBuilder>,
items: &[RolloutItem],
stage: &str,
new_thread_memory_mode: Option<&str>,
updated_at_override: Option<DateTime<Utc>>,
) {
let Some(ctx) = context else {
return;
};
let mut builder = match builder {
Some(builder) => builder.clone(),
None => match metadata::builder_from_items(items, rollout_path) {
Some(builder) => builder,
None => {
warn!(
"state db apply_rollout_items missing builder during {stage}: {}",
rollout_path.display()
);
warn!("state db discrepancy during apply_rollout_items: {stage}, missing_builder");
return;
}
},
};
if builder.model_provider.is_none() {
builder.model_provider = Some(default_provider.to_string());
}
builder.rollout_path = rollout_path.to_path_buf();
builder.cwd = normalize_cwd_for_state_db(&builder.cwd);
if let Err(err) = ctx
.apply_rollout_items(&builder, items, new_thread_memory_mode, updated_at_override)
.await
{
warn!(
"state db apply_rollout_items failed during {stage} for {}: {err}",
rollout_path.display()
);
}
}
pub async fn touch_thread_updated_at(
context: Option<&codex_state::StateRuntime>,
thread_id: Option<ThreadId>,
updated_at: DateTime<Utc>,
stage: &str,
) -> bool {
let Some(ctx) = context else {
return false;
};
let Some(thread_id) = thread_id else {
return false;
};
ctx.touch_thread_updated_at(thread_id, updated_at)
.await
.unwrap_or_else(|err| {
warn!("state db touch_thread_updated_at failed during {stage} for {thread_id}: {err}");
false
})
}
#[cfg(test)]
#[path = "state_db_tests.rs"]
mod tests;