Compare commits

...

6 Commits

Author SHA1 Message Date
David de Regt
43f76b7ec5 PR feedback 2026-05-28 13:21:52 -07:00
David de Regt
90ca356b53 add ability to recover page 0 corruptions 2026-05-28 13:21:52 -07:00
David de Regt
03f1c6f260 pr feedback 2026-05-28 13:21:51 -07:00
David de Regt
b47eef3115 build fixes, maybe 2026-05-28 13:21:51 -07:00
David de Regt
2cdc1e191d PR feedback 2026-05-28 13:21:51 -07:00
David de Regt
cab26c24e2 Automatic SQLite DB recovery
We're seeing increasing user reports of corrupted SQLite databases, and the vast majority of those have been recoverable with sqlite's built-in recovery process.  Unfortunately, the rust crate we're using doesn't have the recovery code compiled in, so we have to compile the recovery functions specifically ourselves, so we vendor those files into the repo here and run it if we detect a malformed DB during sqlite startup.  If the recovery attempt fails, it reverts to the original logic where it asks if the user wants to blow away their current database and start over.
2026-05-28 13:21:51 -07:00
19 changed files with 7939 additions and 5 deletions

2
.cargo/config.toml Normal file
View File

@@ -0,0 +1,2 @@
[env]
LIBSQLITE3_FLAGS = "SQLITE_ENABLE_DBPAGE_VTAB"

View File

@@ -275,6 +275,14 @@ crate.annotation(
inject_repo(crate, "zlib")
crate.annotation(
build_script_env = {
"LIBSQLITE3_FLAGS": "SQLITE_ENABLE_DBPAGE_VTAB",
},
crate = "libsqlite3-sys",
gen_build_script = "on",
)
bazel_dep(name = "xz", version = "5.4.5.bcr.8")
single_version_override(
module_name = "xz",

2
codex-rs/Cargo.lock generated
View File

@@ -3723,11 +3723,13 @@ name = "codex-state"
version = "0.0.0"
dependencies = [
"anyhow",
"cc",
"chrono",
"clap",
"codex-git-utils",
"codex-protocol",
"dirs",
"libsqlite3-sys",
"log",
"owo-colors",
"pretty_assertions",

View File

@@ -263,6 +263,7 @@ axum = { version = "0.8", default-features = false }
base64 = "0.22.1"
bm25 = "2.3.2"
bytes = "1.10.1"
cc = "1.2.55"
chardetng = "0.1.17"
chrono = "0.4.43"
clap = "4"
@@ -311,6 +312,7 @@ keyring = { version = "3.6", default-features = false }
landlock = "0.4.4"
lazy_static = "1"
libc = "0.2.182"
libsqlite3-sys = "0.30.1"
log = "0.4"
lru = "0.16.3"
maplit = "1.0.2"

View File

@@ -18,7 +18,7 @@ pub(crate) fn is_locked(detail: &str) -> bool {
pub(crate) fn confirm_repair(startup_error: &LocalStateDbStartupError) -> std::io::Result<bool> {
eprintln!("Codex couldn't start because its local database appears to be damaged.");
eprintln!("Codex can try a safe repair by backing up those files and rebuilding them.");
eprintln!("Codex can try to repair by backing up and rebuilding those files.");
print_technical_details(startup_error);
crate::confirm("Repair Codex local data now? [y/N]: ")
}

View File

@@ -1,12 +1,31 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "state",
crate_name = "codex_state",
# Cargo uses build.rs to compile the SQLite recovery extension. Bazel wires
# the same C sources directly so the native archive is a normal dependency.
build_script_enabled = False,
compile_data = glob([
"goals_migrations/**",
"logs_migrations/**",
"memory_migrations/**",
"migrations/**",
]),
deps_extra = [":sqlite-recover"],
)
cc_library(
name = "sqlite-recover",
srcs = [
"vendor/sqlite-recover/dbdata.c",
"vendor/sqlite-recover/sqlite3recover.c",
],
hdrs = [
"vendor/sqlite-recover/sqlite3.h",
"vendor/sqlite-recover/sqlite3recover.h",
],
includes = ["vendor/sqlite-recover"],
visibility = ["//visibility:private"],
)

View File

@@ -3,6 +3,7 @@ name = "codex-state"
version.workspace = true
edition.workspace = true
license.workspace = true
build = "build.rs"
[dependencies]
anyhow = { workspace = true }
@@ -10,6 +11,7 @@ chrono = { workspace = true }
clap = { workspace = true, features = ["derive", "env"] }
codex-protocol = { workspace = true }
dirs = { workspace = true }
libsqlite3-sys = { workspace = true }
log = { workspace = true }
owo-colors = { workspace = true }
serde = { workspace = true, features = ["derive"] }
@@ -25,5 +27,8 @@ uuid = { workspace = true }
codex-git-utils = { workspace = true }
pretty_assertions = { workspace = true }
[build-dependencies]
cc = { workspace = true }
[lints]
workspace = true

13
codex-rs/state/build.rs Normal file
View File

@@ -0,0 +1,13 @@
fn main() {
println!("cargo:rerun-if-changed=vendor/sqlite-recover/dbdata.c");
println!("cargo:rerun-if-changed=vendor/sqlite-recover/sqlite3.h");
println!("cargo:rerun-if-changed=vendor/sqlite-recover/sqlite3recover.c");
println!("cargo:rerun-if-changed=vendor/sqlite-recover/sqlite3recover.h");
cc::Build::new()
.file("vendor/sqlite-recover/dbdata.c")
.file("vendor/sqlite-recover/sqlite3recover.c")
.include("vendor/sqlite-recover")
.warnings(false)
.compile("codex_sqlite_recover");
}

View File

@@ -95,3 +95,7 @@ pub const DB_INIT_METRIC: &str = "codex.sqlite.init.count";
pub const DB_INIT_DURATION_METRIC: &str = "codex.sqlite.init.duration_ms";
/// Rollout fallback attempts. Tags: [caller, reason]
pub const DB_FALLBACK_METRIC: &str = "codex.sqlite.fallback.count";
/// SQLite automatic recovery attempts. Tags: [status, db, error, trigger_error]
pub const DB_RECOVERY_METRIC: &str = "codex.sqlite.recovery.count";
/// SQLite automatic recovery latency. Tags: [status, db, error, trigger_error]
pub const DB_RECOVERY_DURATION_METRIC: &str = "codex.sqlite.recovery.duration_ms";

View File

@@ -62,6 +62,7 @@ mod backfill;
mod goals;
mod logs;
mod memories;
mod recovery;
mod remote_control;
#[cfg(test)]
mod test_support;
@@ -354,6 +355,37 @@ async fn open_sqlite(
telemetry_override: Option<&dyn DbTelemetry>,
) -> anyhow::Result<SqlitePool> {
let options = base_sqlite_options(path).auto_vacuum(SqliteAutoVacuum::Incremental);
let pool = match connect_sqlite(options.clone(), spec, telemetry_override).await {
Ok(pool) => pool,
Err(err) => {
if !recovery::is_malformed_sqlite_error(&err) {
return Err(err);
}
recovery::recover_database(path, spec, migrator, &err, telemetry_override).await?;
connect_sqlite(options.clone(), spec, telemetry_override).await?
}
};
match migrate_sqlite(&pool, migrator, spec, telemetry_override).await {
Ok(()) => Ok(pool),
Err(err) => {
if !recovery::is_malformed_sqlite_error(&err) {
return Err(err);
}
pool.close().await;
recovery::recover_database(path, spec, migrator, &err, telemetry_override).await?;
let pool = connect_sqlite(options, spec, telemetry_override).await?;
migrate_sqlite(&pool, migrator, spec, telemetry_override).await?;
Ok(pool)
}
}
}
async fn connect_sqlite(
options: SqliteConnectOptions,
spec: RuntimeDbSpec,
telemetry_override: Option<&dyn DbTelemetry>,
) -> anyhow::Result<SqlitePool> {
let started = Instant::now();
let pool_result = SqlitePoolOptions::new()
.max_connections(5)
@@ -367,9 +399,17 @@ async fn open_sqlite(
started.elapsed(),
&pool_result,
);
let pool = pool_result?;
pool_result
}
async fn migrate_sqlite(
pool: &SqlitePool,
migrator: &Migrator,
spec: RuntimeDbSpec,
telemetry_override: Option<&dyn DbTelemetry>,
) -> anyhow::Result<()> {
let started = Instant::now();
let migrate_result = migrator.run(&pool).await.map_err(anyhow::Error::from);
let migrate_result = migrator.run(pool).await.map_err(anyhow::Error::from);
crate::telemetry::record_init_result(
telemetry_override,
spec.kind,
@@ -377,8 +417,7 @@ async fn open_sqlite(
started.elapsed(),
&migrate_result,
);
migrate_result?;
Ok(pool)
migrate_result
}
pub(super) async fn ensure_backfill_state_row_in_pool(
@@ -468,6 +507,7 @@ mod tests {
use super::state_db_path;
use super::test_support::unique_temp_dir;
use crate::DB_INIT_METRIC;
use crate::DB_RECOVERY_METRIC;
use crate::DbTelemetry;
use crate::migrations::STATE_MIGRATOR;
use pretty_assertions::assert_eq;
@@ -476,6 +516,10 @@ mod tests {
use sqlx::sqlite::SqliteConnectOptions;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fs::OpenOptions;
use std::io::Seek;
use std::io::SeekFrom;
use std::io::Write;
use std::path::Path;
use std::sync::Mutex;
@@ -620,6 +664,102 @@ mod tests {
let _ = tokio::fs::remove_dir_all(codex_home).await;
}
#[tokio::test]
async fn open_state_sqlite_recovers_malformed_database_on_startup() {
let codex_home = unique_temp_dir();
tokio::fs::create_dir_all(&codex_home)
.await
.expect("create codex home");
let state_path = state_db_path(codex_home.as_path());
let pool = SqlitePool::connect_with(
SqliteConnectOptions::new()
.filename(&state_path)
.create_if_missing(true),
)
.await
.expect("open state db");
STATE_MIGRATOR
.run(&pool)
.await
.expect("apply current state schema");
let thread_id = "00000000-0000-0000-0000-000000000123";
sqlx::query(
r#"
INSERT INTO threads (
id,
rollout_path,
created_at,
updated_at,
source,
model_provider,
cwd,
title,
sandbox_policy,
approval_mode
) VALUES (?, ?, 1, 1, 'cli', 'test-provider', ?, 'startup recovery', 'read-only', 'on-request')
"#,
)
.bind(thread_id)
.bind(codex_home.join("session.jsonl").display().to_string())
.bind(codex_home.as_path().display().to_string())
.execute(&pool)
.await
.expect("insert thread");
let page_size: i64 = sqlx::query_scalar("PRAGMA page_size")
.fetch_one(&pool)
.await
.expect("read page size");
let migration_root_page: i64 = sqlx::query_scalar(
"SELECT rootpage FROM sqlite_schema WHERE name = '_sqlx_migrations'",
)
.fetch_one(&pool)
.await
.expect("read migration root page");
pool.close().await;
corrupt_page(
state_path.as_path(),
page_size.try_into().expect("page size should fit u64"),
migration_root_page
.try_into()
.expect("root page should fit u64"),
)
.expect("corrupt migration table page");
let telemetry = TestTelemetry::default();
let tolerant_migrator = runtime_state_migrator();
let recovered_pool =
open_state_sqlite(state_path.as_path(), &tolerant_migrator, Some(&telemetry))
.await
.expect("startup should recover malformed state db");
let title: String = sqlx::query_scalar("SELECT title FROM threads WHERE id = ?")
.bind(thread_id)
.fetch_one(&recovered_pool)
.await
.expect("recovered thread should exist");
recovered_pool.close().await;
assert_eq!(title, "startup recovery");
let integrity = sqlite_integrity_check(state_path.as_path())
.await
.expect("integrity check should run");
assert_eq!(integrity, vec!["ok".to_string()]);
let recovery_event = telemetry
.counters()
.into_iter()
.find(|event| event.name == DB_RECOVERY_METRIC)
.expect("recovery metric should be recorded");
assert_eq!(
recovery_event.tags,
BTreeMap::from([
("db".to_string(), "state".to_string()),
("error".to_string(), "none".to_string()),
("status".to_string(), "success".to_string()),
("trigger_error".to_string(), "corrupt".to_string()),
])
);
let _ = tokio::fs::remove_dir_all(codex_home).await;
}
#[tokio::test]
async fn init_records_successful_sqlite_init_phases_to_explicit_telemetry() {
let codex_home = unique_temp_dir();
@@ -661,4 +801,14 @@ mod tests {
runtime.logs_pool.close().await;
let _ = tokio::fs::remove_dir_all(codex_home).await;
}
fn corrupt_page(path: &Path, page_size: u64, page_number: u64) -> std::io::Result<()> {
let offset = page_size
.checked_mul(page_number.saturating_sub(1))
.ok_or_else(|| std::io::Error::other("corrupt page offset overflowed"))?;
let mut file = OpenOptions::new().write(true).open(path)?;
file.seek(SeekFrom::Start(offset))?;
file.write_all(&[0; 16])?;
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,230 @@
//! Rebuild recovered SQLite tables when the schema root page is gone.
//!
//! If page 1 is destroyed, SQLite's recovery extension cannot discover the
//! sqlite_schema root. It can still recover orphaned sqlite_schema rows into
//! lost_and_found, so this module rebuilds tables from those rows and then
//! copies matching lost_and_found record groups back into the recreated tables.
use super::quote_identifier;
use anyhow::Context;
use anyhow::Result;
use sqlx::AssertSqlSafe;
use sqlx::Row;
use sqlx::SqlitePool;
use tracing::warn;
#[derive(Debug)]
struct SchemaObject {
object_type: String,
name: String,
rootpage: i64,
sql: String,
}
#[derive(Debug)]
struct TableColumn {
cid: i64,
name: String,
column_type: String,
pk: i64,
hidden: i64,
}
pub(super) async fn rebuild_from_recovered_schema_if_needed(pool: &SqlitePool) -> Result<bool> {
if !only_lost_and_found_table_exists(pool).await? {
return Ok(false);
}
let schema = recovered_schema_objects(pool).await?;
if schema
.iter()
.filter(|object| object.object_type == "table")
.count()
== 0
{
return Ok(false);
}
warn!(
"SQLite recovery produced only lost_and_found rows; rebuilding tables from recovered schema rows"
);
sqlx::query("PRAGMA foreign_keys = OFF")
.execute(pool)
.await?;
for object in schema.iter().filter(|object| object.object_type == "table") {
// Recovered sqlite_schema SQL is local database metadata, not
// interpolated user input.
sqlx::query(AssertSqlSafe(object.sql.clone()))
.execute(pool)
.await
.with_context(|| format!("failed to recreate recovered table {}", object.name))?;
}
let value_column_count = lost_and_found_value_column_count(pool).await?;
for object in schema.iter().filter(|object| object.object_type == "table") {
copy_lost_and_found_rows(pool, object, value_column_count).await?;
}
for object in schema.iter().filter(|object| object.object_type != "table") {
// Recovered sqlite_schema SQL is local database metadata, not
// interpolated user input.
if let Err(err) = sqlx::query(AssertSqlSafe(object.sql.clone()))
.execute(pool)
.await
{
warn!(
"skipping recovered {} {} during lost_and_found rebuild: {err}",
object.object_type, object.name
);
}
}
Ok(true)
}
async fn only_lost_and_found_table_exists(pool: &SqlitePool) -> Result<bool> {
let tables = sqlx::query_scalar::<_, String>(
r#"
SELECT name
FROM sqlite_schema
WHERE type = 'table'
AND name NOT LIKE 'sqlite_%'
AND name != '_sqlx_migrations'
ORDER BY name
"#,
)
.fetch_all(pool)
.await?;
Ok(tables.len() == 1 && tables[0] == "lost_and_found")
}
async fn recovered_schema_objects(pool: &SqlitePool) -> Result<Vec<SchemaObject>> {
let rows = sqlx::query(
r#"
SELECT
CAST(c0 AS TEXT) AS object_type,
CAST(c1 AS TEXT) AS name,
CAST(c3 AS INTEGER) AS rootpage,
CAST(c4 AS TEXT) AS sql
FROM lost_and_found
WHERE nfield = 5
AND c0 IN ('table', 'index', 'trigger', 'view')
AND typeof(c1) = 'text'
AND typeof(c4) = 'text'
AND c1 NOT LIKE 'sqlite_%'
ORDER BY
CASE c0
WHEN 'table' THEN 0
WHEN 'view' THEN 1
WHEN 'index' THEN 2
WHEN 'trigger' THEN 3
ELSE 4
END,
id
"#,
)
.fetch_all(pool)
.await?;
rows.into_iter()
.map(|row| {
Ok(SchemaObject {
object_type: row.try_get("object_type")?,
name: row.try_get("name")?,
rootpage: row.try_get("rootpage")?,
sql: row.try_get("sql")?,
})
})
.collect()
}
async fn lost_and_found_value_column_count(pool: &SqlitePool) -> Result<i64> {
let rows = sqlx::query("SELECT name FROM pragma_table_xinfo('lost_and_found')")
.fetch_all(pool)
.await?;
Ok(rows
.iter()
.filter_map(|row| row.try_get::<String, _>("name").ok())
.filter_map(|name| name.strip_prefix('c')?.parse::<i64>().ok())
.max()
.map_or(0, |max_column| max_column + 1))
}
async fn copy_lost_and_found_rows(
pool: &SqlitePool,
table: &SchemaObject,
value_column_count: i64,
) -> Result<()> {
if table.rootpage <= 0 {
return Ok(());
}
let columns = table_columns(pool, table.name.as_str()).await?;
let mut column_names = Vec::new();
let mut value_expressions = Vec::new();
for column in columns.into_iter().filter(|column| column.hidden == 0) {
column_names.push(quote_identifier(column.name.as_str()));
if is_integer_primary_key(&column) {
// INTEGER PRIMARY KEY values live in the b-tree rowid, not in the
// record body, so sqlite_dbdata exposes them through lost_and_found.id.
value_expressions.push("id".to_string());
} else {
if column.cid >= value_column_count {
anyhow::bail!(
"lost_and_found table has {value_column_count} value columns, but recovered table {} needs c{}",
table.name,
column.cid
);
}
value_expressions.push(format!("c{}", column.cid));
}
}
if column_names.is_empty() {
return Ok(());
}
let table_name = quote_identifier(table.name.as_str());
let sql = format!(
"INSERT OR REPLACE INTO main.{table_name} ({}) SELECT {} FROM lost_and_found WHERE rootpgno = ?",
column_names.join(", "),
value_expressions.join(", ")
);
// Dynamic identifiers are quoted with quote_identifier before interpolation.
sqlx::query(AssertSqlSafe(sql))
.bind(table.rootpage)
.execute(pool)
.await
.with_context(|| format!("failed to copy lost_and_found rows into {}", table.name))?;
Ok(())
}
async fn table_columns(pool: &SqlitePool, table: &str) -> Result<Vec<TableColumn>> {
let rows = sqlx::query(
r#"
SELECT cid, name, type, pk, hidden
FROM pragma_table_xinfo(?)
ORDER BY cid
"#,
)
.bind(table)
.fetch_all(pool)
.await?;
rows.into_iter()
.map(|row| {
Ok(TableColumn {
cid: row.try_get("cid")?,
name: row.try_get("name")?,
column_type: row.try_get("type")?,
pk: row.try_get("pk")?,
hidden: row.try_get("hidden")?,
})
})
.collect()
}
fn is_integer_primary_key(column: &TableColumn) -> bool {
column.pk > 0 && column.column_type.eq_ignore_ascii_case("INTEGER")
}

View File

@@ -0,0 +1,415 @@
use anyhow::Context;
use anyhow::Result;
use libsqlite3_sys as ffi;
use std::ffi::CStr;
use std::ffi::CString;
use std::ffi::c_char;
use std::ffi::c_int;
use std::ffi::c_void;
use std::fs::File;
use std::fs::OpenOptions;
use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::ptr;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
const SQLITE_RECOVER_LOST_AND_FOUND: c_int = 1;
const SQLITE_HEADER: &[u8; 16] = b"SQLite format 3\0";
const SQLITE_UTF8_ENCODING: u32 = 1;
const SQLITE_SCHEMA_FORMAT_4: u32 = 4;
const SQLITE_MAX_U16_PAGE_SIZE_SENTINEL: u32 = 1;
const SQLITE_MAX_PAGE_SIZE: usize = 65_536;
const SQLITE_MIN_PAGE_SIZE: usize = 512;
const SQLITE_PAGE1_BTREE_HEADER_OFFSET: usize = 100;
const SQLITE_LEAF_TABLE_PAGE: u8 = 0x0d;
const HEADER_REPAIR_SCAN_PAGES: usize = 64;
#[repr(C)]
struct SqliteRecover {
_private: [u8; 0],
}
unsafe extern "C" {
fn sqlite3_recover_init(
db: *mut ffi::sqlite3,
z_db: *const c_char,
z_uri: *const c_char,
) -> *mut SqliteRecover;
fn sqlite3_recover_config(recover: *mut SqliteRecover, op: c_int, arg: *mut c_void) -> c_int;
fn sqlite3_recover_run(recover: *mut SqliteRecover) -> c_int;
fn sqlite3_recover_errmsg(recover: *mut SqliteRecover) -> *const c_char;
fn sqlite3_recover_errcode(recover: *mut SqliteRecover) -> c_int;
fn sqlite3_recover_finish(recover: *mut SqliteRecover) -> c_int;
}
pub(super) fn recover(path: &Path, recovered_path: &Path) -> Result<()> {
match recover_inner(path, recovered_path) {
Ok(()) => Ok(()),
Err(err) if is_notadb_error(&err) => {
let repaired = HeaderRepairedInput::create(path).with_context(|| {
format!(
"failed to prepare header-repaired recovery input for {}",
path.display()
)
})?;
recover_inner(repaired.path(), recovered_path).with_context(|| {
format!(
"SQLite recovery with a synthesized header failed after direct recovery failed: {err}"
)
})
}
Err(err) => Err(err),
}
}
fn recover_inner(path: &Path, recovered_path: &Path) -> Result<()> {
let db = SqliteHandle::open(path)?;
let recovered_path = path_to_cstring(recovered_path)?;
let mut recovery = RecoveryHandle::new(db.as_ptr(), recovered_path.as_c_str())?;
recovery.configure_lost_and_found()?;
recovery.run()?;
recovery.finish()
}
fn is_notadb_error(err: &anyhow::Error) -> bool {
err.chain().any(|cause| {
let message = cause.to_string();
message.contains("(26)") || message.contains("file is not a database")
})
}
struct HeaderRepairedInput {
path: PathBuf,
}
impl HeaderRepairedInput {
fn create(path: &Path) -> Result<Self> {
let mut input =
File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
let file_len = input.metadata()?.len();
let page_size = detect_page_size(&mut input, file_len)?;
let page_count = file_len
.checked_div(page_size as u64)
.context("database page count overflowed")?;
let page_count = u32::try_from(page_count)
.context("database is too large to synthesize a SQLite header")?;
let mut output = None;
for sequence in 0..1000 {
let repaired_path = header_repair_path(path, sequence)?;
match OpenOptions::new()
.write(true)
.create_new(true)
.open(repaired_path.as_path())
{
Ok(file) => {
output = Some((repaired_path, file));
break;
}
Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {}
Err(err) => return Err(err.into()),
}
}
let (repaired_path, mut output) =
output.context("failed to allocate a unique header-repaired recovery path")?;
let header = synthesized_header_page(page_size, page_count)?;
output.write_all(header.as_slice())?;
input.seek(SeekFrom::Start(page_size as u64))?;
std::io::copy(&mut input, &mut output)?;
Ok(Self {
path: repaired_path,
})
}
fn path(&self) -> &Path {
self.path.as_path()
}
}
impl Drop for HeaderRepairedInput {
fn drop(&mut self) {
let _ = std::fs::remove_file(self.path.as_path());
}
}
fn header_repair_path(path: &Path, sequence: u32) -> Result<PathBuf> {
let file_name = path.file_name().ok_or_else(|| {
anyhow::anyhow!(
"cannot create a header-repaired recovery file name for {}",
path.display()
)
})?;
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_or(0, |duration| duration.as_nanos());
let mut candidate = file_name.to_os_string();
candidate.push(format!(
".codex-recovery-header-repair.{}.{timestamp}.{sequence}.sqlite",
std::process::id()
));
Ok(path.with_file_name(candidate))
}
fn detect_page_size(input: &mut File, file_len: u64) -> Result<usize> {
if file_len < SQLITE_MIN_PAGE_SIZE as u64 {
anyhow::bail!("database file is too small to repair its SQLite header");
}
input.seek(SeekFrom::Start(0))?;
let scan_len = file_len.min((SQLITE_MAX_PAGE_SIZE * 4) as u64) as usize;
let mut prefix = vec![0; scan_len];
input.read_exact(prefix.as_mut_slice())?;
let mut best_page_size = None;
let mut best_valid_pages = 0;
let mut page_size = SQLITE_MIN_PAGE_SIZE;
while page_size <= SQLITE_MAX_PAGE_SIZE {
if file_len.is_multiple_of(page_size as u64) {
let valid_pages = count_valid_btree_pages(prefix.as_slice(), page_size);
if valid_pages > best_valid_pages {
best_valid_pages = valid_pages;
best_page_size = Some(page_size);
}
}
page_size *= 2;
}
best_page_size.context("failed to infer SQLite page size for header repair")
}
fn count_valid_btree_pages(prefix: &[u8], page_size: usize) -> usize {
prefix
.chunks_exact(page_size)
.take(HEADER_REPAIR_SCAN_PAGES)
.filter(|page| is_plausible_btree_page(page))
.count()
}
fn is_plausible_btree_page(page: &[u8]) -> bool {
let page_type = page[0];
let header_size = match page_type {
0x02 | 0x05 => 12,
0x0a | 0x0d => 8,
_ => return false,
};
let first_freeblock = read_u16(&page[1..3]) as usize;
let cell_count = read_u16(&page[3..5]) as usize;
let cell_content_start = read_u16(&page[5..7]) as usize;
let cell_content_start = if cell_content_start == 0 && page.len() == SQLITE_MAX_PAGE_SIZE {
SQLITE_MAX_PAGE_SIZE
} else {
cell_content_start
};
let pointer_array_end = header_size + cell_count.saturating_mul(2);
if first_freeblock >= page.len()
|| cell_content_start > page.len()
|| pointer_array_end > page.len()
|| cell_count > (page.len() - header_size) / 2
{
return false;
}
for index in 0..cell_count {
let offset = header_size + index * 2;
let cell_offset = read_u16(&page[offset..offset + 2]) as usize;
if cell_offset < pointer_array_end || cell_offset >= page.len() {
return false;
}
}
true
}
fn synthesized_header_page(page_size: usize, page_count: u32) -> Result<Vec<u8>> {
if !(SQLITE_MIN_PAGE_SIZE..=SQLITE_MAX_PAGE_SIZE).contains(&page_size)
|| !page_size.is_power_of_two()
{
anyhow::bail!("invalid SQLite page size inferred for header repair: {page_size}");
}
let mut page = vec![0; page_size];
page[0..16].copy_from_slice(SQLITE_HEADER);
let header_page_size = if page_size == SQLITE_MAX_PAGE_SIZE {
SQLITE_MAX_U16_PAGE_SIZE_SENTINEL
} else {
page_size as u32
};
write_u16(&mut page[16..18], header_page_size);
page[18] = 1;
page[19] = 1;
page[20] = 0;
page[21] = 64;
page[22] = 32;
page[23] = 32;
write_u32(&mut page[28..32], page_count);
write_u32(&mut page[44..48], SQLITE_SCHEMA_FORMAT_4);
write_u32(&mut page[56..60], SQLITE_UTF8_ENCODING);
// Use an empty sqlite_schema b-tree. The damaged page 1 cannot be trusted,
// and orphaned schema rows will be recovered through lost_and_found.
page[SQLITE_PAGE1_BTREE_HEADER_OFFSET] = SQLITE_LEAF_TABLE_PAGE;
let btree_content_start = if page_size == SQLITE_MAX_PAGE_SIZE {
0
} else {
page_size as u32
};
write_u16(
&mut page[SQLITE_PAGE1_BTREE_HEADER_OFFSET + 5..SQLITE_PAGE1_BTREE_HEADER_OFFSET + 7],
btree_content_start,
);
Ok(page)
}
fn read_u16(bytes: &[u8]) -> u16 {
u16::from_be_bytes([bytes[0], bytes[1]])
}
fn write_u16(bytes: &mut [u8], value: u32) {
bytes.copy_from_slice(&(value as u16).to_be_bytes());
}
fn write_u32(bytes: &mut [u8], value: u32) {
bytes.copy_from_slice(&value.to_be_bytes());
}
struct SqliteHandle {
db: *mut ffi::sqlite3,
}
impl SqliteHandle {
fn open(path: &Path) -> Result<Self> {
let path = path_to_cstring(path)?;
let mut db = ptr::null_mut();
let flags = ffi::SQLITE_OPEN_READWRITE | ffi::SQLITE_OPEN_URI;
// The recovery API reads pages through sqlite_dbpage on this handle.
// It does not depend on SQLx because the database may be malformed.
let rc = unsafe { ffi::sqlite3_open_v2(path.as_ptr(), &mut db, flags, ptr::null()) };
if rc != ffi::SQLITE_OK {
let message = sqlite_error_message(db);
if !db.is_null() {
let _ = unsafe { ffi::sqlite3_close(db) };
}
anyhow::bail!("failed to open malformed database for recovery ({rc}): {message}");
}
Ok(Self { db })
}
fn as_ptr(&self) -> *mut ffi::sqlite3 {
self.db
}
}
impl Drop for SqliteHandle {
fn drop(&mut self) {
if !self.db.is_null() {
let _ = unsafe { ffi::sqlite3_close(self.db) };
}
}
}
struct RecoveryHandle {
recover: *mut SqliteRecover,
}
impl RecoveryHandle {
fn new(db: *mut ffi::sqlite3, recovered_path: &CStr) -> Result<Self> {
let recover =
unsafe { sqlite3_recover_init(db, c"main".as_ptr(), recovered_path.as_ptr()) };
if recover.is_null() {
anyhow::bail!("failed to initialize SQLite recovery: out of memory");
}
Ok(Self { recover })
}
fn configure_lost_and_found(&mut self) -> Result<()> {
// Match sqlite3 shell recovery behavior by keeping orphaned rows in a
// table instead of discarding pages not reachable from recovered schema.
let table_name = c"lost_and_found";
self.configure(
SQLITE_RECOVER_LOST_AND_FOUND,
table_name.as_ptr().cast_mut().cast(),
)
}
fn configure(&mut self, op: c_int, arg: *mut c_void) -> Result<()> {
let rc = unsafe { sqlite3_recover_config(self.recover, op, arg) };
if rc != ffi::SQLITE_OK {
anyhow::bail!(
"failed to configure SQLite recovery ({rc}): {}",
self.error_message()
);
}
Ok(())
}
fn run(&mut self) -> Result<()> {
let rc = unsafe { sqlite3_recover_run(self.recover) };
if rc != ffi::SQLITE_OK {
anyhow::bail!("SQLite recovery failed ({rc}): {}", self.error_message());
}
Ok(())
}
fn finish(mut self) -> Result<()> {
let rc = unsafe { sqlite3_recover_finish(self.recover) };
self.recover = ptr::null_mut();
if rc != ffi::SQLITE_OK {
anyhow::bail!("SQLite recovery cleanup failed ({rc})");
}
Ok(())
}
fn error_message(&self) -> String {
let errcode = unsafe { sqlite3_recover_errcode(self.recover) };
let message = unsafe { sqlite3_recover_errmsg(self.recover) };
format!("{errcode}: {}", c_string_lossy(message))
}
}
impl Drop for RecoveryHandle {
fn drop(&mut self) {
if !self.recover.is_null() {
let _ = unsafe { sqlite3_recover_finish(self.recover) };
}
}
}
fn sqlite_error_message(db: *mut ffi::sqlite3) -> String {
if db.is_null() {
return "out of memory".to_string();
}
c_string_lossy(unsafe { ffi::sqlite3_errmsg(db) })
}
fn c_string_lossy(message: *const c_char) -> String {
if message.is_null() {
return "unknown error".to_string();
}
unsafe { CStr::from_ptr(message) }
.to_string_lossy()
.into_owned()
}
#[cfg(unix)]
fn path_to_cstring(path: &Path) -> Result<CString> {
use std::os::unix::ffi::OsStrExt;
CString::new(path.as_os_str().as_bytes())
.with_context(|| format!("path contains a NUL byte: {}", path.display()))
}
#[cfg(not(unix))]
fn path_to_cstring(path: &Path) -> Result<CString> {
let path_str = path
.to_str()
.with_context(|| format!("path is not valid UTF-8: {}", path.display()))?;
CString::new(path_str).with_context(|| format!("path contains a NUL byte: {}", path.display()))
}

View File

@@ -6,6 +6,8 @@ use std::time::Duration;
use crate::DB_FALLBACK_METRIC;
use crate::DB_INIT_DURATION_METRIC;
use crate::DB_INIT_METRIC;
use crate::DB_RECOVERY_DURATION_METRIC;
use crate::DB_RECOVERY_METRIC;
use tracing::debug;
/// Low-cardinality sink for SQLite startup and fallback telemetry.
@@ -92,6 +94,24 @@ pub fn record_fallback(
);
}
pub(crate) fn record_recovery_result<T>(
telemetry: Option<&dyn DbTelemetry>,
db: DbKind,
duration: Duration,
trigger_error: &anyhow::Error,
result: &anyhow::Result<T>,
) {
let outcome = DbOutcomeTags::from_result(result);
let tags = [
("status", outcome.status),
("db", db.as_str()),
("error", outcome.error),
("trigger_error", classify_error(trigger_error)),
];
record_counter(telemetry, DB_RECOVERY_METRIC, &tags);
record_duration(telemetry, DB_RECOVERY_DURATION_METRIC, duration, &tags);
}
fn record_counter(telemetry: Option<&dyn DbTelemetry>, name: &str, tags: &[(&str, &str)]) {
if let Some(telemetry) = resolve_telemetry(telemetry) {
telemetry.counter(name, /*inc*/ 1, tags);
@@ -138,6 +158,9 @@ fn classify_error(err: &anyhow::Error) -> &'static str {
if let Some(sqlx_err) = cause.downcast_ref::<sqlx::Error>() {
return classify_sqlx_error(sqlx_err);
}
if error_message_is_corrupt(cause.to_string().as_str()) {
return "corrupt";
}
if cause
.downcast_ref::<sqlx::migrate::MigrateError>()
.is_some()
@@ -189,6 +212,15 @@ fn classify_sqlite_code(code: &str) -> &'static str {
}
}
fn error_message_is_corrupt(message: &str) -> bool {
let message = message.to_ascii_lowercase();
message.contains("database disk image is malformed")
|| message.contains("file is not a database")
|| message.contains("database schema is malformed")
|| message.contains("database is corrupt")
|| (message.contains("database") && message.contains("malformed"))
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -0,0 +1,22 @@
Minimal SQLite recovery sources vendored from upstream SQLite:
- `ext/recover/sqlite3recover.c`
- `ext/recover/sqlite3recover.h`
- `ext/recover/dbdata.c`
`sqlite3.h` is a comment-stripped copy from the `libsqlite3-sys` 0.37.0
bundled SQLite 3.51.3 source (`SQLITE_SOURCE_ID`
`2026-03-13 10:38:09 737ae4a34738ffa0c3ff7f9bb18df914dd1cad163f28fd6b6e114a344fe6d618`)
so Cargo and Bazel compile these extension files with matching public SQLite
declarations without adding a build-dependency that would compile SQLite a
second time.
These files implement SQLite's recover extension without invoking the
`sqlite3` command-line shell. They are compiled into `codex-state` and link
against the same `libsqlite3-sys` library that SQLx uses.
The recovery API is not exposed by `libsqlite3-sys`, and no published Rust
crate in the dependency graph provides this extension without also building a
second SQLite copy. When updating `libsqlite3-sys`, refresh these files from
the matching SQLite source snapshot and keep the vendored set limited to the
recover/dbdata extension sources plus the trimmed public header.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,249 @@
/*
** 2022-08-27
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
**
** This file contains the public interface to the "recover" extension -
** an SQLite extension designed to recover data from corrupted database
** files.
*/
/*
** OVERVIEW:
**
** To use the API to recover data from a corrupted database, an
** application:
**
** 1) Creates an sqlite3_recover handle by calling either
** sqlite3_recover_init() or sqlite3_recover_init_sql().
**
** 2) Configures the new handle using one or more calls to
** sqlite3_recover_config().
**
** 3) Executes the recovery by repeatedly calling sqlite3_recover_step() on
** the handle until it returns something other than SQLITE_OK. If it
** returns SQLITE_DONE, then the recovery operation completed without
** error. If it returns some other non-SQLITE_OK value, then an error
** has occurred.
**
** 4) Retrieves any error code and English language error message using the
** sqlite3_recover_errcode() and sqlite3_recover_errmsg() APIs,
** respectively.
**
** 5) Destroys the sqlite3_recover handle and frees all resources
** using sqlite3_recover_finish().
**
** The application may abandon the recovery operation at any point
** before it is finished by passing the sqlite3_recover handle to
** sqlite3_recover_finish(). This is not an error, but the final state
** of the output database, or the results of running the partial script
** delivered to the SQL callback, are undefined.
*/
#ifndef _SQLITE_RECOVER_H
#define _SQLITE_RECOVER_H
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif
/*
** An instance of the sqlite3_recover object represents a recovery
** operation in progress.
**
** Constructors:
**
** sqlite3_recover_init()
** sqlite3_recover_init_sql()
**
** Destructor:
**
** sqlite3_recover_finish()
**
** Methods:
**
** sqlite3_recover_config()
** sqlite3_recover_errcode()
** sqlite3_recover_errmsg()
** sqlite3_recover_run()
** sqlite3_recover_step()
*/
typedef struct sqlite3_recover sqlite3_recover;
/*
** These two APIs attempt to create and return a new sqlite3_recover object.
** In both cases the first two arguments identify the (possibly
** corrupt) database to recover data from. The first argument is an open
** database handle and the second the name of a database attached to that
** handle (i.e. "main", "temp" or the name of an attached database).
**
** If sqlite3_recover_init() is used to create the new sqlite3_recover
** handle, then data is recovered into a new database, identified by
** string parameter zUri. zUri may be an absolute or relative file path,
** or may be an SQLite URI. If the identified database file already exists,
** it is overwritten.
**
** If sqlite3_recover_init_sql() is invoked, then any recovered data will
** be returned to the user as a series of SQL statements. Executing these
** SQL statements results in the same database as would have been created
** had sqlite3_recover_init() been used. For each SQL statement in the
** output, the callback function passed as the third argument (xSql) is
** invoked once. The first parameter is a passed a copy of the fourth argument
** to this function (pCtx) as its first parameter, and a pointer to a
** nul-terminated buffer containing the SQL statement formated as UTF-8 as
** the second. If the xSql callback returns any value other than SQLITE_OK,
** then processing is immediately abandoned and the value returned used as
** the recover handle error code (see below).
**
** If an out-of-memory error occurs, NULL may be returned instead of
** a valid handle. In all other cases, it is the responsibility of the
** application to avoid resource leaks by ensuring that
** sqlite3_recover_finish() is called on all allocated handles.
*/
sqlite3_recover *sqlite3_recover_init(
sqlite3* db,
const char *zDb,
const char *zUri
);
sqlite3_recover *sqlite3_recover_init_sql(
sqlite3* db,
const char *zDb,
int (*xSql)(void*, const char*),
void *pCtx
);
/*
** Configure an sqlite3_recover object that has just been created using
** sqlite3_recover_init() or sqlite3_recover_init_sql(). This function
** may only be called before the first call to sqlite3_recover_step()
** or sqlite3_recover_run() on the object.
**
** The second argument passed to this function must be one of the
** SQLITE_RECOVER_* symbols defined below. Valid values for the third argument
** depend on the specific SQLITE_RECOVER_* symbol in use.
**
** SQLITE_OK is returned if the configuration operation was successful,
** or an SQLite error code otherwise.
*/
int sqlite3_recover_config(sqlite3_recover*, int op, void *pArg);
/*
** SQLITE_RECOVER_LOST_AND_FOUND:
** The pArg argument points to a string buffer containing the name
** of a "lost-and-found" table in the output database, or NULL. If
** the argument is non-NULL and the database contains seemingly
** valid pages that cannot be associated with any table in the
** recovered part of the schema, data is extracted from these
** pages to add to the lost-and-found table.
**
** SQLITE_RECOVER_FREELIST_CORRUPT:
** The pArg value must actually be a pointer to a value of type
** int containing value 0 or 1 cast as a (void*). If this option is set
** (argument is 1) and a lost-and-found table has been configured using
** SQLITE_RECOVER_LOST_AND_FOUND, then is assumed that the freelist is
** corrupt and an attempt is made to recover records from pages that
** appear to be linked into the freelist. Otherwise, pages on the freelist
** are ignored. Setting this option can recover more data from the
** database, but often ends up "recovering" deleted records. The default
** value is 0 (clear).
**
** SQLITE_RECOVER_ROWIDS:
** The pArg value must actually be a pointer to a value of type
** int containing value 0 or 1 cast as a (void*). If this option is set
** (argument is 1), then an attempt is made to recover rowid values
** that are not also INTEGER PRIMARY KEY values. If this option is
** clear, then new rowids are assigned to all recovered rows. The
** default value is 1 (set).
**
** SQLITE_RECOVER_SLOWINDEXES:
** The pArg value must actually be a pointer to a value of type
** int containing value 0 or 1 cast as a (void*). If this option is clear
** (argument is 0), then when creating an output database, the recover
** module creates and populates non-UNIQUE indexes right at the end of the
** recovery operation - after all recoverable data has been inserted
** into the new database. This is faster overall, but means that the
** final call to sqlite3_recover_step() for a recovery operation may
** be need to create a large number of indexes, which may be very slow.
**
** Or, if this option is set (argument is 1), then non-UNIQUE indexes
** are created in the output database before it is populated with
** recovered data. This is slower overall, but avoids the slow call
** to sqlite3_recover_step() at the end of the recovery operation.
**
** The default option value is 0.
*/
#define SQLITE_RECOVER_LOST_AND_FOUND 1
#define SQLITE_RECOVER_FREELIST_CORRUPT 2
#define SQLITE_RECOVER_ROWIDS 3
#define SQLITE_RECOVER_SLOWINDEXES 4
/*
** Perform a unit of work towards the recovery operation. This function
** must normally be called multiple times to complete database recovery.
**
** If no error occurs but the recovery operation is not completed, this
** function returns SQLITE_OK. If recovery has been completed successfully
** then SQLITE_DONE is returned. If an error has occurred, then an SQLite
** error code (e.g. SQLITE_IOERR or SQLITE_NOMEM) is returned. It is not
** considered an error if some or all of the data cannot be recovered
** due to database corruption.
**
** Once sqlite3_recover_step() has returned a value other than SQLITE_OK,
** all further such calls on the same recover handle are no-ops that return
** the same non-SQLITE_OK value.
*/
int sqlite3_recover_step(sqlite3_recover*);
/*
** Run the recovery operation to completion. Return SQLITE_OK if successful,
** or an SQLite error code otherwise. Calling this function is the same
** as executing:
**
** while( SQLITE_OK==sqlite3_recover_step(p) );
** return sqlite3_recover_errcode(p);
*/
int sqlite3_recover_run(sqlite3_recover*);
/*
** If an error has been encountered during a prior call to
** sqlite3_recover_step(), then this function attempts to return a
** pointer to a buffer containing an English language explanation of
** the error. If no error message is available, or if an out-of memory
** error occurs while attempting to allocate a buffer in which to format
** the error message, NULL is returned.
**
** The returned buffer remains valid until the sqlite3_recover handle is
** destroyed using sqlite3_recover_finish().
*/
const char *sqlite3_recover_errmsg(sqlite3_recover*);
/*
** If this function is called on an sqlite3_recover handle after
** an error occurs, an SQLite error code is returned. Otherwise, SQLITE_OK.
*/
int sqlite3_recover_errcode(sqlite3_recover*);
/*
** Clean up a recovery object created by a call to sqlite3_recover_init().
** The results of using a recovery object with any API after it has been
** passed to this function are undefined.
**
** This function returns the same value as sqlite3_recover_errcode().
*/
int sqlite3_recover_finish(sqlite3_recover*);
#ifdef __cplusplus
} /* end of the 'extern "C"' block */
#endif
#endif /* ifndef _SQLITE_RECOVER_H */