mirror of
https://github.com/openai/codex.git
synced 2026-05-05 11:57:33 +00:00
feat: move phase 2 to a diff based system
This commit is contained in:
@@ -57,24 +57,12 @@ async fn memories_startup_phase2_tracks_added_and_removed_inputs_across_runs() -
|
||||
let first_request = wait_for_single_request(&first_phase2).await;
|
||||
let first_prompt = phase2_prompt_text(&first_request);
|
||||
assert!(
|
||||
first_prompt.contains("- selected inputs this run: 1"),
|
||||
"expected selected count in first prompt: {first_prompt}"
|
||||
first_prompt.contains("- A raw_memories.md"),
|
||||
"expected raw memories to be added in first prompt: {first_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_prompt.contains("- newly added since the last successful Phase 2 run: 1"),
|
||||
"expected added count in first prompt: {first_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_prompt.contains("- removed from the last successful Phase 2 run: 0"),
|
||||
"expected removed count in first prompt: {first_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_prompt.contains(&format!("- [added] thread_id={thread_a},")),
|
||||
"expected thread A to be marked added: {first_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_prompt.contains("Removed from the last successful Phase 2 selection:\n- none"),
|
||||
"expected no removed items in first prompt: {first_prompt}"
|
||||
first_prompt.contains("rollout_a.md"),
|
||||
"expected rollout A summary to be added: {first_prompt}"
|
||||
);
|
||||
|
||||
wait_for_phase2_success(db.as_ref(), thread_a).await?;
|
||||
@@ -113,32 +101,24 @@ async fn memories_startup_phase2_tracks_added_and_removed_inputs_across_runs() -
|
||||
let second_request = wait_for_single_request(&second_phase2).await;
|
||||
let second_prompt = phase2_prompt_text(&second_request);
|
||||
assert!(
|
||||
second_prompt.contains("- selected inputs this run: 1"),
|
||||
"expected selected count in second prompt: {second_prompt}"
|
||||
second_prompt.contains("- M raw_memories.md"),
|
||||
"expected raw memories to be modified in second prompt: {second_prompt}"
|
||||
);
|
||||
assert!(
|
||||
second_prompt.contains("- newly added since the last successful Phase 2 run: 1"),
|
||||
"expected added count in second prompt: {second_prompt}"
|
||||
second_prompt.contains("rollout_b.md"),
|
||||
"expected rollout B summary to be added: {second_prompt}"
|
||||
);
|
||||
assert!(
|
||||
second_prompt.contains("- removed from the last successful Phase 2 run: 1"),
|
||||
"expected removed count in second prompt: {second_prompt}"
|
||||
);
|
||||
assert!(
|
||||
second_prompt.contains(&format!("- [added] thread_id={thread_b},")),
|
||||
"expected thread B to be marked added: {second_prompt}"
|
||||
);
|
||||
assert!(
|
||||
second_prompt.contains(&format!("- thread_id={thread_a},")),
|
||||
"expected thread A to be marked removed: {second_prompt}"
|
||||
second_prompt.contains("- D rollout_summaries/"),
|
||||
"expected rollout A summary to be deleted: {second_prompt}"
|
||||
);
|
||||
|
||||
wait_for_phase2_success(db.as_ref(), thread_b).await?;
|
||||
let raw_memories = tokio::fs::read_to_string(memory_root.join("raw_memories.md")).await?;
|
||||
assert!(raw_memories.contains("raw memory B"));
|
||||
assert!(raw_memories.contains("raw memory A"));
|
||||
assert!(!raw_memories.contains("raw memory A"));
|
||||
let rollout_summaries = read_rollout_summary_bodies(&memory_root).await?;
|
||||
assert_eq!(rollout_summaries.len(), 2);
|
||||
assert_eq!(rollout_summaries.len(), 1);
|
||||
assert!(
|
||||
rollout_summaries
|
||||
.iter()
|
||||
@@ -150,7 +130,7 @@ async fn memories_startup_phase2_tracks_added_and_removed_inputs_across_runs() -
|
||||
.any(|summary| summary.contains("git_branch: branch-rollout-b"))
|
||||
);
|
||||
assert!(
|
||||
rollout_summaries
|
||||
!rollout_summaries
|
||||
.iter()
|
||||
.any(|summary| summary.contains("rollout summary A"))
|
||||
);
|
||||
@@ -187,8 +167,8 @@ async fn memories_startup_phase2_prunes_old_extension_resources_and_reports_them
|
||||
"{}-abcd-10min-old.md",
|
||||
(now - ChronoDuration::days(8)).format("%Y-%m-%dT%H-%M-%S")
|
||||
);
|
||||
let old_file = chronicle_resources.join(&old_file_name);
|
||||
tokio::fs::write(&old_file, "old resource").await?;
|
||||
let legacy_old_file = chronicle_resources.join(&old_file_name);
|
||||
tokio::fs::write(&legacy_old_file, "old resource").await?;
|
||||
let recent_file = chronicle_resources.join(format!(
|
||||
"{}-abcd-10min-recent.md",
|
||||
(now - ChronoDuration::days(6)).format("%Y-%m-%dT%H-%M-%S")
|
||||
@@ -210,30 +190,28 @@ async fn memories_startup_phase2_prunes_old_extension_resources_and_reports_them
|
||||
let prompt = phase2_prompt_text(&request);
|
||||
|
||||
assert!(
|
||||
prompt.contains("Memory extension resources removed by retention pruning:"),
|
||||
"expected extension resource prune report in prompt: {prompt}"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains("- retention window: 7 days"),
|
||||
"expected retention window in prompt: {prompt}"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains("- extension: chronicle"),
|
||||
"expected extension name in prompt: {prompt}"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains(&format!(" - resources/{old_file_name}")),
|
||||
"expected old resource in prompt: {prompt}"
|
||||
prompt.contains(&format!(
|
||||
"- D extensions/chronicle/resources/{old_file_name}"
|
||||
)),
|
||||
"expected old resource deletion in prompt: {prompt}"
|
||||
);
|
||||
|
||||
wait_for_phase2_success(db.as_ref(), thread_id).await?;
|
||||
let old_file = home.path().join(format!(
|
||||
"memories/extensions/chronicle/resources/{old_file_name}"
|
||||
));
|
||||
wait_for_file_removed(&old_file).await?;
|
||||
assert!(
|
||||
!tokio::fs::try_exists(&old_file).await?,
|
||||
"old extension resource should be pruned"
|
||||
);
|
||||
assert!(
|
||||
tokio::fs::try_exists(&recent_file).await?,
|
||||
tokio::fs::try_exists(
|
||||
home.path()
|
||||
.join("memories/extensions/chronicle/resources")
|
||||
.join(recent_file.file_name().expect("recent file name"))
|
||||
)
|
||||
.await?,
|
||||
"recent extension resource should be retained"
|
||||
);
|
||||
|
||||
@@ -263,8 +241,8 @@ async fn memories_startup_phase2_processes_old_extension_resources_without_stage
|
||||
"{}-abcd-10min-old.md",
|
||||
(now - ChronoDuration::days(8)).format("%Y-%m-%dT%H-%M-%S")
|
||||
);
|
||||
let old_file = chronicle_resources.join(&old_file_name);
|
||||
tokio::fs::write(&old_file, "old resource").await?;
|
||||
let legacy_old_file = chronicle_resources.join(&old_file_name);
|
||||
tokio::fs::write(&legacy_old_file, "old resource").await?;
|
||||
|
||||
let phase2 = mount_sse_once(
|
||||
&server,
|
||||
@@ -281,17 +259,14 @@ async fn memories_startup_phase2_processes_old_extension_resources_without_stage
|
||||
let prompt = phase2_prompt_text(&request);
|
||||
|
||||
assert!(
|
||||
prompt.contains("- selected inputs this run: 0"),
|
||||
"expected no selected raw inputs in prompt: {prompt}"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains("Memory extension resources removed by retention pruning:"),
|
||||
"expected extension resource prune report in prompt: {prompt}"
|
||||
);
|
||||
assert!(
|
||||
prompt.contains(&format!(" - resources/{old_file_name}")),
|
||||
"expected old resource in prompt: {prompt}"
|
||||
prompt.contains(&format!(
|
||||
"- D extensions/chronicle/resources/{old_file_name}"
|
||||
)),
|
||||
"expected old resource deletion in prompt: {prompt}"
|
||||
);
|
||||
let old_file = home.path().join(format!(
|
||||
"memories/extensions/chronicle/resources/{old_file_name}"
|
||||
));
|
||||
wait_for_file_removed(&old_file).await?;
|
||||
|
||||
shutdown_test_codex(&codex).await?;
|
||||
@@ -397,16 +372,8 @@ async fn web_search_pollution_moves_selected_thread_into_removed_phase2_inputs()
|
||||
.remove(0);
|
||||
let first_phase2_prompt = phase2_prompt_text(&first_phase2_request);
|
||||
assert!(
|
||||
first_phase2_prompt.contains("- selected inputs this run: 1"),
|
||||
"expected seeded thread to be selected before pollution: {first_phase2_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_phase2_prompt.contains("- newly added since the last successful Phase 2 run: 1"),
|
||||
"expected seeded thread to be added before pollution: {first_phase2_prompt}"
|
||||
);
|
||||
assert!(
|
||||
first_phase2_prompt.contains(&format!("- [added] thread_id={thread_id},")),
|
||||
"expected selected thread in first phase2 prompt: {first_phase2_prompt}"
|
||||
first_phase2_prompt.contains("- A raw_memories.md"),
|
||||
"expected raw memories to be added before pollution: {first_phase2_prompt}"
|
||||
);
|
||||
|
||||
wait_for_phase2_success(db.as_ref(), thread_id).await?;
|
||||
@@ -461,6 +428,41 @@ async fn web_search_pollution_moves_selected_thread_into_removed_phase2_inputs()
|
||||
assert_eq!(selection.removed[0].thread_id, thread_id);
|
||||
|
||||
shutdown_test_codex(&resumed).await?;
|
||||
|
||||
let removed_phase2 = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-phase2-removed"),
|
||||
ev_assistant_message("msg-phase2-removed", "phase2 removed complete"),
|
||||
ev_completed("resp-phase2-removed"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let cleanup = build_test_codex(&server, home.clone()).await?;
|
||||
let removed_request = wait_for_single_request(&removed_phase2).await;
|
||||
let removed_prompt = phase2_prompt_text(&removed_request);
|
||||
assert!(
|
||||
removed_prompt.contains("- D rollout_summaries/"),
|
||||
"expected polluted thread rollout summary to be deleted: {removed_prompt}"
|
||||
);
|
||||
|
||||
let workspace_diff =
|
||||
tokio::fs::read_to_string(home.path().join("memories/phase2_workspace_diff.md")).await?;
|
||||
assert!(
|
||||
workspace_diff.contains("Status: deleted"),
|
||||
"expected deleted file section in workspace diff: {workspace_diff}"
|
||||
);
|
||||
assert!(
|
||||
workspace_diff.contains(&format!("-thread_id: {thread_id}")),
|
||||
"expected deleted rollout summary metadata in workspace diff: {workspace_diff}"
|
||||
);
|
||||
assert!(
|
||||
workspace_diff.contains("-rollout summary seeded for web search pollution"),
|
||||
"expected deleted rollout summary content in workspace diff: {workspace_diff}"
|
||||
);
|
||||
|
||||
wait_for_phase2_no_pending_inputs(db.as_ref()).await?;
|
||||
shutdown_test_codex(&cleanup).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -560,7 +562,7 @@ fn phase2_prompt_text(request: &ResponsesRequest) -> String {
|
||||
request
|
||||
.message_input_texts("user")
|
||||
.into_iter()
|
||||
.find(|text| text.contains("Current selected Phase 1 inputs:"))
|
||||
.find(|text| text.contains("Memory workspace changes:"))
|
||||
.expect("phase2 prompt text")
|
||||
}
|
||||
|
||||
@@ -589,6 +591,27 @@ async fn wait_for_phase2_success(
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_phase2_no_pending_inputs(db: &codex_state::StateRuntime) -> Result<()> {
|
||||
let deadline = Instant::now() + Duration::from_secs(10);
|
||||
loop {
|
||||
let selection = db
|
||||
.get_phase2_input_selection(/*n*/ 1, /*max_unused_days*/ 30)
|
||||
.await?;
|
||||
if selection.selected.is_empty()
|
||||
&& selection.retained_thread_ids.is_empty()
|
||||
&& selection.removed.is_empty()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
assert!(
|
||||
Instant::now() < deadline,
|
||||
"timed out waiting for phase2 to clear pending inputs: {selection:?}"
|
||||
);
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn seed_stage1_output_for_existing_thread(
|
||||
db: &codex_state::StateRuntime,
|
||||
thread_id: ThreadId,
|
||||
|
||||
Reference in New Issue
Block a user