mirror of
https://github.com/openai/codex.git
synced 2026-05-02 02:17:22 +00:00
fix: db stuff mem (#11575)
* Documenting DB functions * Fixing 1 nit where stage-2 was sorting the stage 1 in the wrong direction * Added some tests
This commit is contained in:
@@ -1432,6 +1432,104 @@ WHERE id = 1
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn claim_stage1_jobs_prefilters_threads_with_up_to_date_memory() {
|
||||
let codex_home = unique_temp_dir();
|
||||
let runtime = StateRuntime::init(codex_home.clone(), "test-provider".to_string(), None)
|
||||
.await
|
||||
.expect("initialize runtime");
|
||||
|
||||
let now = Utc::now();
|
||||
let eligible_newer_at = now - Duration::hours(13);
|
||||
let eligible_older_at = now - Duration::hours(14);
|
||||
|
||||
let current_thread_id =
|
||||
ThreadId::from_string(&Uuid::new_v4().to_string()).expect("current thread id");
|
||||
let up_to_date_thread_id =
|
||||
ThreadId::from_string(&Uuid::new_v4().to_string()).expect("up-to-date thread id");
|
||||
let stale_thread_id =
|
||||
ThreadId::from_string(&Uuid::new_v4().to_string()).expect("stale thread id");
|
||||
let worker_id = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("worker id");
|
||||
|
||||
let mut current =
|
||||
test_thread_metadata(&codex_home, current_thread_id, codex_home.join("current"));
|
||||
current.created_at = now;
|
||||
current.updated_at = now;
|
||||
runtime
|
||||
.upsert_thread(¤t)
|
||||
.await
|
||||
.expect("upsert current thread");
|
||||
|
||||
let mut up_to_date = test_thread_metadata(
|
||||
&codex_home,
|
||||
up_to_date_thread_id,
|
||||
codex_home.join("up-to-date"),
|
||||
);
|
||||
up_to_date.created_at = eligible_newer_at;
|
||||
up_to_date.updated_at = eligible_newer_at;
|
||||
runtime
|
||||
.upsert_thread(&up_to_date)
|
||||
.await
|
||||
.expect("upsert up-to-date thread");
|
||||
|
||||
let up_to_date_claim = runtime
|
||||
.try_claim_stage1_job(
|
||||
up_to_date_thread_id,
|
||||
worker_id,
|
||||
up_to_date.updated_at.timestamp(),
|
||||
3600,
|
||||
64,
|
||||
)
|
||||
.await
|
||||
.expect("claim up-to-date thread for seed");
|
||||
let up_to_date_token = match up_to_date_claim {
|
||||
Stage1JobClaimOutcome::Claimed { ownership_token } => ownership_token,
|
||||
other => panic!("unexpected seed claim outcome: {other:?}"),
|
||||
};
|
||||
assert!(
|
||||
runtime
|
||||
.mark_stage1_job_succeeded(
|
||||
up_to_date_thread_id,
|
||||
up_to_date_token.as_str(),
|
||||
up_to_date.updated_at.timestamp(),
|
||||
"raw",
|
||||
"summary",
|
||||
)
|
||||
.await
|
||||
.expect("mark up-to-date thread succeeded"),
|
||||
"seed stage1 success should complete for up-to-date thread"
|
||||
);
|
||||
|
||||
let mut stale =
|
||||
test_thread_metadata(&codex_home, stale_thread_id, codex_home.join("stale"));
|
||||
stale.created_at = eligible_older_at;
|
||||
stale.updated_at = eligible_older_at;
|
||||
runtime
|
||||
.upsert_thread(&stale)
|
||||
.await
|
||||
.expect("upsert stale thread");
|
||||
|
||||
let allowed_sources = vec!["cli".to_string()];
|
||||
let claims = runtime
|
||||
.claim_stage1_jobs_for_startup(
|
||||
current_thread_id,
|
||||
Stage1StartupClaimParams {
|
||||
scan_limit: 1,
|
||||
max_claimed: 1,
|
||||
max_age_days: 30,
|
||||
min_rollout_idle_hours: 12,
|
||||
allowed_sources: allowed_sources.as_slice(),
|
||||
lease_seconds: 3600,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("claim stage1 startup jobs");
|
||||
assert_eq!(claims.len(), 1);
|
||||
assert_eq!(claims[0].thread.id, stale_thread_id);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn claim_stage1_jobs_enforces_global_running_cap() {
|
||||
let codex_home = unique_temp_dir();
|
||||
@@ -1559,6 +1657,92 @@ WHERE kind = 'memory_stage1'
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn claim_stage1_jobs_processes_two_full_batches_across_startup_passes() {
|
||||
let codex_home = unique_temp_dir();
|
||||
let runtime = StateRuntime::init(codex_home.clone(), "test-provider".to_string(), None)
|
||||
.await
|
||||
.expect("initialize runtime");
|
||||
|
||||
let current_thread_id =
|
||||
ThreadId::from_string(&Uuid::new_v4().to_string()).expect("current thread id");
|
||||
let mut current =
|
||||
test_thread_metadata(&codex_home, current_thread_id, codex_home.join("current"));
|
||||
current.created_at = Utc::now();
|
||||
current.updated_at = Utc::now();
|
||||
runtime
|
||||
.upsert_thread(¤t)
|
||||
.await
|
||||
.expect("upsert current");
|
||||
|
||||
let eligible_at = Utc::now() - Duration::hours(13);
|
||||
for idx in 0..200 {
|
||||
let thread_id = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id");
|
||||
let mut metadata = test_thread_metadata(
|
||||
&codex_home,
|
||||
thread_id,
|
||||
codex_home.join(format!("thread-{idx}")),
|
||||
);
|
||||
metadata.created_at = eligible_at - Duration::seconds(idx as i64);
|
||||
metadata.updated_at = eligible_at - Duration::seconds(idx as i64);
|
||||
runtime
|
||||
.upsert_thread(&metadata)
|
||||
.await
|
||||
.expect("upsert eligible thread");
|
||||
}
|
||||
|
||||
let allowed_sources = vec!["cli".to_string()];
|
||||
let first_claims = runtime
|
||||
.claim_stage1_jobs_for_startup(
|
||||
current_thread_id,
|
||||
Stage1StartupClaimParams {
|
||||
scan_limit: 5_000,
|
||||
max_claimed: 64,
|
||||
max_age_days: 30,
|
||||
min_rollout_idle_hours: 12,
|
||||
allowed_sources: allowed_sources.as_slice(),
|
||||
lease_seconds: 3_600,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("first stage1 startup claim");
|
||||
assert_eq!(first_claims.len(), 64);
|
||||
|
||||
for claim in first_claims {
|
||||
assert!(
|
||||
runtime
|
||||
.mark_stage1_job_succeeded(
|
||||
claim.thread.id,
|
||||
claim.ownership_token.as_str(),
|
||||
claim.thread.updated_at.timestamp(),
|
||||
"raw",
|
||||
"summary",
|
||||
)
|
||||
.await
|
||||
.expect("mark first-batch stage1 success"),
|
||||
"first batch stage1 completion should succeed"
|
||||
);
|
||||
}
|
||||
|
||||
let second_claims = runtime
|
||||
.claim_stage1_jobs_for_startup(
|
||||
current_thread_id,
|
||||
Stage1StartupClaimParams {
|
||||
scan_limit: 5_000,
|
||||
max_claimed: 64,
|
||||
max_age_days: 30,
|
||||
min_rollout_idle_hours: 12,
|
||||
allowed_sources: allowed_sources.as_slice(),
|
||||
lease_seconds: 3_600,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("second stage1 startup claim");
|
||||
assert_eq!(second_claims.len(), 64);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stage1_output_cascades_on_thread_delete() {
|
||||
let codex_home = unique_temp_dir();
|
||||
@@ -1695,6 +1879,88 @@ WHERE kind = 'memory_stage1'
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stage1_retry_exhaustion_does_not_block_newer_watermark() {
|
||||
let codex_home = unique_temp_dir();
|
||||
let runtime = StateRuntime::init(codex_home.clone(), "test-provider".to_string(), None)
|
||||
.await
|
||||
.expect("initialize runtime");
|
||||
|
||||
let thread_id = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("thread id");
|
||||
let owner = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner id");
|
||||
runtime
|
||||
.upsert_thread(&test_thread_metadata(
|
||||
&codex_home,
|
||||
thread_id,
|
||||
codex_home.join("workspace"),
|
||||
))
|
||||
.await
|
||||
.expect("upsert thread");
|
||||
|
||||
for attempt in 0..3 {
|
||||
let claim = runtime
|
||||
.try_claim_stage1_job(thread_id, owner, 100, 3_600, 64)
|
||||
.await
|
||||
.expect("claim stage1 for retry exhaustion");
|
||||
let ownership_token = match claim {
|
||||
Stage1JobClaimOutcome::Claimed { ownership_token } => ownership_token,
|
||||
other => panic!(
|
||||
"attempt {} should claim stage1 before retries are exhausted: {other:?}",
|
||||
attempt + 1
|
||||
),
|
||||
};
|
||||
assert!(
|
||||
runtime
|
||||
.mark_stage1_job_failed(thread_id, ownership_token.as_str(), "boom", 0)
|
||||
.await
|
||||
.expect("mark stage1 failed"),
|
||||
"attempt {} should decrement retry budget",
|
||||
attempt + 1
|
||||
);
|
||||
}
|
||||
|
||||
let exhausted_claim = runtime
|
||||
.try_claim_stage1_job(thread_id, owner, 100, 3_600, 64)
|
||||
.await
|
||||
.expect("claim stage1 after retry exhaustion");
|
||||
assert_eq!(
|
||||
exhausted_claim,
|
||||
Stage1JobClaimOutcome::SkippedRetryExhausted
|
||||
);
|
||||
|
||||
let newer_source_claim = runtime
|
||||
.try_claim_stage1_job(thread_id, owner, 101, 3_600, 64)
|
||||
.await
|
||||
.expect("claim stage1 with newer source watermark");
|
||||
assert!(
|
||||
matches!(newer_source_claim, Stage1JobClaimOutcome::Claimed { .. }),
|
||||
"newer source watermark should reset retry budget and be claimable"
|
||||
);
|
||||
|
||||
let job_row = sqlx::query(
|
||||
"SELECT retry_remaining, input_watermark FROM jobs WHERE kind = ? AND job_key = ?",
|
||||
)
|
||||
.bind("memory_stage1")
|
||||
.bind(thread_id.to_string())
|
||||
.fetch_one(runtime.pool.as_ref())
|
||||
.await
|
||||
.expect("load stage1 job row after newer-source claim");
|
||||
assert_eq!(
|
||||
job_row
|
||||
.try_get::<i64, _>("retry_remaining")
|
||||
.expect("retry_remaining"),
|
||||
3
|
||||
);
|
||||
assert_eq!(
|
||||
job_row
|
||||
.try_get::<i64, _>("input_watermark")
|
||||
.expect("input_watermark"),
|
||||
101
|
||||
);
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn phase2_global_consolidation_reruns_when_watermark_advances() {
|
||||
let codex_home = unique_temp_dir();
|
||||
@@ -2078,6 +2344,65 @@ VALUES (?, ?, ?, ?, ?)
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn phase2_backfilled_inputs_below_last_success_still_become_dirty() {
|
||||
let codex_home = unique_temp_dir();
|
||||
let runtime = StateRuntime::init(codex_home.clone(), "test-provider".to_string(), None)
|
||||
.await
|
||||
.expect("initialize runtime");
|
||||
|
||||
runtime
|
||||
.enqueue_global_consolidation(500)
|
||||
.await
|
||||
.expect("enqueue initial consolidation");
|
||||
let owner_a = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner a");
|
||||
let claim_a = runtime
|
||||
.try_claim_global_phase2_job(owner_a, 3_600)
|
||||
.await
|
||||
.expect("claim initial consolidation");
|
||||
let token_a = match claim_a {
|
||||
Phase2JobClaimOutcome::Claimed {
|
||||
ownership_token,
|
||||
input_watermark,
|
||||
} => {
|
||||
assert_eq!(input_watermark, 500);
|
||||
ownership_token
|
||||
}
|
||||
other => panic!("unexpected initial phase2 claim outcome: {other:?}"),
|
||||
};
|
||||
assert!(
|
||||
runtime
|
||||
.mark_global_phase2_job_succeeded(token_a.as_str(), 500)
|
||||
.await
|
||||
.expect("mark initial phase2 success"),
|
||||
"initial phase2 success should finalize"
|
||||
);
|
||||
|
||||
runtime
|
||||
.enqueue_global_consolidation(400)
|
||||
.await
|
||||
.expect("enqueue backfilled consolidation");
|
||||
|
||||
let owner_b = ThreadId::from_string(&Uuid::new_v4().to_string()).expect("owner b");
|
||||
let claim_b = runtime
|
||||
.try_claim_global_phase2_job(owner_b, 3_600)
|
||||
.await
|
||||
.expect("claim backfilled consolidation");
|
||||
match claim_b {
|
||||
Phase2JobClaimOutcome::Claimed {
|
||||
input_watermark, ..
|
||||
} => {
|
||||
assert!(
|
||||
input_watermark > 500,
|
||||
"backfilled enqueue should advance dirty watermark beyond last success"
|
||||
);
|
||||
}
|
||||
other => panic!("unexpected backfilled phase2 claim outcome: {other:?}"),
|
||||
}
|
||||
|
||||
let _ = tokio::fs::remove_dir_all(codex_home).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn phase2_failure_fallback_updates_unowned_running_job() {
|
||||
let codex_home = unique_temp_dir();
|
||||
|
||||
Reference in New Issue
Block a user