mirror of
https://github.com/openai/codex.git
synced 2026-04-27 08:05:51 +00:00
[codex-backend] Make thread metadata updates tolerate pending backfill (#16877)
### Summary Fix `thread/metadata/update` so it can still patch stored thread metadata when the list/backfill-gated `get_state_db(...)` path is unavailable. What was happening: - The app logs showed `thread/metadata/update` failing with `sqlite state db unavailable for thread ...`. - This was not isolated to one bad thread. Once the failure started for a user, branch metadata updates failed 100% of the time for that user. - Reports were staggered across users, which points at local app-server / local SQLite state rather than one global server-side failure. - Turns could still start immediately after the metadata update failed, which suggests the thread itself was valid and the failure was in the metadata endpoint DB-handle path. The fix: - Keep using the loaded thread state DB and the normal `get_state_db(...)` fallback first. - If that still returns `None`, open `StateRuntime::init(...)` directly for this targeted metadata update path. - Log the direct state runtime init error if that final fallback also fails, so future reports have the real DB-open cause instead of only the generic unavailable error. - Add a regression test where the DB exists but backfill is not complete, and verify `thread/metadata/update` can still repair the stored rollout thread and patch `gitInfo`. Relevant context / suspect PRs: - #16434 changed state DB startup to run auto-vacuum / incremental vacuum. This is the most suspicious timing match for per-user, staggered local SQLite availability failures. - #16433 dropped the old log table from the state DB, also near the timing window. - #13280 introduced this endpoint and made it rely on SQLite for git metadata without resuming the thread. - #14859 and #14888 added/consumed persisted model + reasoning effort metadata. I checked these because of the new thread metadata fields, but this failure happens before the endpoint reaches thread-row update/load logic, so they seem less likely as the direct cause. ### Testing - `cargo fmt -- --config imports_granularity=Item` completed; local stable rustfmt emitted warnings that `imports_granularity` is unstable - `cargo test -p codex-app-server thread_metadata_update` - `git diff --check`
This commit is contained in:
committed by
GitHub
parent
54dbbb839e
commit
4ce97cef02
@@ -223,6 +223,59 @@ async fn thread_metadata_update_repairs_missing_sqlite_row_for_stored_thread() -
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_metadata_update_repairs_stored_thread_before_backfill_completes() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
let _state_db =
|
||||
StateRuntime::init(codex_home.path().to_path_buf(), "mock_provider".into()).await?;
|
||||
|
||||
let preview = "Stored thread preview before backfill";
|
||||
let thread_id = create_fake_rollout(
|
||||
codex_home.path(),
|
||||
"2025-01-05T12-30-00",
|
||||
"2025-01-05T12:30:00Z",
|
||||
preview,
|
||||
Some("mock_provider"),
|
||||
/*git_info*/ None,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let update_id = mcp
|
||||
.send_thread_metadata_update_request(ThreadMetadataUpdateParams {
|
||||
thread_id: thread_id.clone(),
|
||||
git_info: Some(ThreadMetadataGitInfoUpdateParams {
|
||||
sha: None,
|
||||
branch: Some(Some("feature/pending-backfill".to_string())),
|
||||
origin_url: None,
|
||||
}),
|
||||
})
|
||||
.await?;
|
||||
let update_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(update_id)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadMetadataUpdateResponse { thread: updated } =
|
||||
to_response::<ThreadMetadataUpdateResponse>(update_resp)?;
|
||||
|
||||
assert_eq!(updated.id, thread_id);
|
||||
assert_eq!(updated.preview, preview);
|
||||
assert_eq!(
|
||||
updated.git_info,
|
||||
Some(GitInfo {
|
||||
sha: None,
|
||||
branch: Some("feature/pending-backfill".to_string()),
|
||||
origin_url: None,
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_metadata_update_repairs_loaded_thread_without_resetting_summary() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
|
||||
Reference in New Issue
Block a user