mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
feat: align memory phase 1 and make it stronger (#11300)
## Align with the new phase-1 design Basically we know run phase 1 in parallel by considering: * Max 64 rollouts * Max 1 month old * Consider the most recent first This PR also adds stronger parallelization capabilities by detecting stale jobs, retry policies, ownership of computation to prevent double computations etc etc
This commit is contained in:
@@ -1,6 +1,27 @@
|
||||
use super::*;
|
||||
use chrono::DateTime;
|
||||
use chrono::Utc;
|
||||
use sha2::Digest;
|
||||
use sha2::Sha256;
|
||||
use std::time::Duration;
|
||||
|
||||
const MEMORY_STARTUP_STAGE: &str = "run_memories_startup_pipeline";
|
||||
const PHASE_ONE_THREAD_SCAN_LIMIT: usize = 5_000;
|
||||
const PHASE_ONE_DB_LOCK_RETRY_LIMIT: usize = 3;
|
||||
const PHASE_ONE_DB_LOCK_RETRY_BACKOFF_MS: u64 = 25;
|
||||
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
struct MemoryScopeTarget {
|
||||
scope_kind: &'static str,
|
||||
scope_key: String,
|
||||
memory_root: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ClaimedPhaseOneCandidate {
|
||||
candidate: memories::RolloutCandidate,
|
||||
claimed_scopes: Vec<(MemoryScopeTarget, String)>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct StageOneRequestContext {
|
||||
@@ -55,7 +76,7 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
let Some(page) = state_db::list_threads_db(
|
||||
session.services.state_db.as_deref(),
|
||||
&config.codex_home,
|
||||
200,
|
||||
PHASE_ONE_THREAD_SCAN_LIMIT,
|
||||
None,
|
||||
ThreadSortKey::UpdatedAt,
|
||||
INTERACTIVE_SESSION_SOURCES,
|
||||
@@ -68,32 +89,26 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut existing_memories = Vec::new();
|
||||
for item in &page.items {
|
||||
if let Some(memory) = state_db::get_thread_memory(
|
||||
session.services.state_db.as_deref(),
|
||||
item.id,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
.await
|
||||
{
|
||||
existing_memories.push(memory);
|
||||
}
|
||||
}
|
||||
|
||||
let candidates = memories::select_rollout_candidates_from_db(
|
||||
let selection_candidates = memories::select_rollout_candidates_from_db(
|
||||
&page.items,
|
||||
session.conversation_id,
|
||||
&existing_memories,
|
||||
memories::MAX_ROLLOUTS_PER_STARTUP,
|
||||
PHASE_ONE_THREAD_SCAN_LIMIT,
|
||||
memories::PHASE_ONE_MAX_ROLLOUT_AGE_DAYS,
|
||||
);
|
||||
let claimed_candidates = claim_phase_one_candidates(
|
||||
session,
|
||||
config.as_ref(),
|
||||
selection_candidates,
|
||||
memories::MAX_ROLLOUTS_PER_STARTUP,
|
||||
)
|
||||
.await;
|
||||
info!(
|
||||
"memory phase-1 candidate selection complete: {} candidate(s) from {} indexed thread(s)",
|
||||
candidates.len(),
|
||||
"memory phase-1 candidate selection complete: {} claimed candidate(s) from {} indexed thread(s)",
|
||||
claimed_candidates.len(),
|
||||
page.items.len()
|
||||
);
|
||||
|
||||
if candidates.is_empty() {
|
||||
if claimed_candidates.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -102,62 +117,173 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
turn_context.resolve_turn_metadata_header().await,
|
||||
);
|
||||
|
||||
let touched_cwds =
|
||||
futures::stream::iter(candidates.into_iter())
|
||||
.map(|candidate| {
|
||||
let touched_scope_sets =
|
||||
futures::stream::iter(claimed_candidates.into_iter())
|
||||
.map(|claimed_candidate| {
|
||||
let session = Arc::clone(session);
|
||||
let config = Arc::clone(&config);
|
||||
let stage_one_context = stage_one_context.clone();
|
||||
async move {
|
||||
process_memory_candidate(session, config, candidate, stage_one_context).await
|
||||
process_memory_candidate(session, claimed_candidate, stage_one_context).await
|
||||
}
|
||||
})
|
||||
.buffer_unordered(memories::PHASE_ONE_CONCURRENCY_LIMIT)
|
||||
.filter_map(futures::future::ready)
|
||||
.collect::<HashSet<PathBuf>>()
|
||||
.collect::<Vec<HashSet<MemoryScopeTarget>>>()
|
||||
.await;
|
||||
let touched_scopes = touched_scope_sets
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect::<HashSet<MemoryScopeTarget>>();
|
||||
info!(
|
||||
"memory phase-1 extraction complete: {} cwd(s) touched",
|
||||
touched_cwds.len()
|
||||
"memory phase-1 extraction complete: {} scope(s) touched",
|
||||
touched_scopes.len()
|
||||
);
|
||||
|
||||
if touched_cwds.is_empty() {
|
||||
if touched_scopes.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let consolidation_cwd_count = touched_cwds.len();
|
||||
futures::stream::iter(touched_cwds.into_iter())
|
||||
.map(|cwd| {
|
||||
let consolidation_scope_count = touched_scopes.len();
|
||||
futures::stream::iter(touched_scopes.into_iter())
|
||||
.map(|scope| {
|
||||
let session = Arc::clone(session);
|
||||
let config = Arc::clone(&config);
|
||||
async move {
|
||||
run_memory_consolidation_for_cwd(session, config, cwd).await;
|
||||
run_memory_consolidation_for_scope(session, config, scope).await;
|
||||
}
|
||||
})
|
||||
.buffer_unordered(memories::PHASE_ONE_CONCURRENCY_LIMIT)
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
info!(
|
||||
"memory phase-2 consolidation dispatch complete: {} cwd(s) scheduled",
|
||||
consolidation_cwd_count
|
||||
"memory phase-2 consolidation dispatch complete: {} scope(s) scheduled",
|
||||
consolidation_scope_count
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn claim_phase_one_candidates(
|
||||
session: &Session,
|
||||
config: &Config,
|
||||
candidates: Vec<memories::RolloutCandidate>,
|
||||
max_claimed_candidates: usize,
|
||||
) -> Vec<ClaimedPhaseOneCandidate> {
|
||||
if max_claimed_candidates == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let Some(state_db) = session.services.state_db.as_deref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let mut claimed_candidates = Vec::new();
|
||||
for candidate in candidates {
|
||||
if claimed_candidates.len() >= max_claimed_candidates {
|
||||
break;
|
||||
}
|
||||
|
||||
let source_updated_at = parse_source_updated_at_epoch(&candidate);
|
||||
let mut claimed_scopes = Vec::<(MemoryScopeTarget, String)>::new();
|
||||
for scope in memory_scope_targets_for_candidate(config, &candidate) {
|
||||
let Some(claim) = try_claim_phase1_job_with_retry(
|
||||
state_db,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
session.conversation_id,
|
||||
source_updated_at,
|
||||
)
|
||||
.await
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if let codex_state::Phase1JobClaimOutcome::Claimed { ownership_token } = claim {
|
||||
claimed_scopes.push((scope, ownership_token));
|
||||
}
|
||||
}
|
||||
|
||||
if !claimed_scopes.is_empty() {
|
||||
claimed_candidates.push(ClaimedPhaseOneCandidate {
|
||||
candidate,
|
||||
claimed_scopes,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
claimed_candidates
|
||||
}
|
||||
|
||||
async fn try_claim_phase1_job_with_retry(
|
||||
state_db: &codex_state::StateRuntime,
|
||||
thread_id: ThreadId,
|
||||
scope_kind: &str,
|
||||
scope_key: &str,
|
||||
owner_session_id: ThreadId,
|
||||
source_updated_at: i64,
|
||||
) -> Option<codex_state::Phase1JobClaimOutcome> {
|
||||
for attempt in 0..=PHASE_ONE_DB_LOCK_RETRY_LIMIT {
|
||||
match state_db
|
||||
.try_claim_phase1_job(
|
||||
thread_id,
|
||||
scope_kind,
|
||||
scope_key,
|
||||
owner_session_id,
|
||||
source_updated_at,
|
||||
memories::PHASE_ONE_JOB_LEASE_SECONDS,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(claim) => return Some(claim),
|
||||
Err(err) => {
|
||||
let is_locked = err.to_string().contains("database is locked");
|
||||
if is_locked && attempt < PHASE_ONE_DB_LOCK_RETRY_LIMIT {
|
||||
tokio::time::sleep(Duration::from_millis(
|
||||
PHASE_ONE_DB_LOCK_RETRY_BACKOFF_MS * (attempt as u64 + 1),
|
||||
))
|
||||
.await;
|
||||
continue;
|
||||
}
|
||||
warn!("state db try_claim_phase1_job failed during {MEMORY_STARTUP_STAGE}: {err}");
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn process_memory_candidate(
|
||||
session: Arc<Session>,
|
||||
config: Arc<Config>,
|
||||
candidate: memories::RolloutCandidate,
|
||||
claimed_candidate: ClaimedPhaseOneCandidate,
|
||||
stage_one_context: StageOneRequestContext,
|
||||
) -> Option<PathBuf> {
|
||||
let memory_root = memories::memory_root_for_cwd(&config.codex_home, &candidate.cwd);
|
||||
if let Err(err) = memories::ensure_layout(&memory_root).await {
|
||||
warn!(
|
||||
"failed to create memory layout for cwd {}: {err}",
|
||||
candidate.cwd.display()
|
||||
);
|
||||
return None;
|
||||
) -> HashSet<MemoryScopeTarget> {
|
||||
let candidate = claimed_candidate.candidate;
|
||||
let claimed_scopes = claimed_candidate.claimed_scopes;
|
||||
|
||||
let mut ready_scopes = Vec::<(MemoryScopeTarget, String)>::new();
|
||||
for (scope, ownership_token) in claimed_scopes {
|
||||
if let Err(err) = memories::ensure_layout(&scope.memory_root).await {
|
||||
warn!(
|
||||
"failed to create memory layout for scope {}:{} root={}: {err}",
|
||||
scope.scope_kind,
|
||||
scope.scope_key,
|
||||
scope.memory_root.display()
|
||||
);
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session.as_ref(),
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
&ownership_token,
|
||||
"failed to create memory layout",
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
}
|
||||
ready_scopes.push((scope, ownership_token));
|
||||
}
|
||||
if ready_scopes.is_empty() {
|
||||
return HashSet::new();
|
||||
}
|
||||
|
||||
let (rollout_items, _thread_id, parse_errors) =
|
||||
@@ -168,7 +294,14 @@ async fn process_memory_candidate(
|
||||
"failed to load rollout {} for memories: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
fail_claimed_phase_one_jobs(
|
||||
&session,
|
||||
&candidate,
|
||||
&ready_scopes,
|
||||
"failed to load rollout",
|
||||
)
|
||||
.await;
|
||||
return HashSet::new();
|
||||
}
|
||||
};
|
||||
if parse_errors > 0 {
|
||||
@@ -188,7 +321,14 @@ async fn process_memory_candidate(
|
||||
"failed to prepare filtered rollout payload {} for memories: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
fail_claimed_phase_one_jobs(
|
||||
&session,
|
||||
&candidate,
|
||||
&ready_scopes,
|
||||
"failed to serialize filtered rollout",
|
||||
)
|
||||
.await;
|
||||
return HashSet::new();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -232,7 +372,14 @@ async fn process_memory_candidate(
|
||||
"stage-1 memory request failed for rollout {}: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
fail_claimed_phase_one_jobs(
|
||||
&session,
|
||||
&candidate,
|
||||
&ready_scopes,
|
||||
"stage-1 memory request failed",
|
||||
)
|
||||
.await;
|
||||
return HashSet::new();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -243,7 +390,14 @@ async fn process_memory_candidate(
|
||||
"failed while waiting for stage-1 memory response for rollout {}: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
fail_claimed_phase_one_jobs(
|
||||
&session,
|
||||
&candidate,
|
||||
&ready_scopes,
|
||||
"stage-1 memory response stream failed",
|
||||
)
|
||||
.await;
|
||||
return HashSet::new();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -254,68 +408,288 @@ async fn process_memory_candidate(
|
||||
"invalid stage-1 memory payload for rollout {}: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
fail_claimed_phase_one_jobs(
|
||||
&session,
|
||||
&candidate,
|
||||
&ready_scopes,
|
||||
"invalid stage-1 memory payload",
|
||||
)
|
||||
.await;
|
||||
return HashSet::new();
|
||||
}
|
||||
};
|
||||
|
||||
let raw_memory_path =
|
||||
match memories::write_raw_memory(&memory_root, &candidate, &stage_one_output.raw_memory)
|
||||
.await
|
||||
let mut touched_scopes = HashSet::new();
|
||||
for (scope, ownership_token) in &ready_scopes {
|
||||
if persist_phase_one_memory_for_scope(
|
||||
&session,
|
||||
&candidate,
|
||||
scope,
|
||||
ownership_token,
|
||||
&stage_one_output.raw_memory,
|
||||
&stage_one_output.summary,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(path) => path,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"failed to write raw memory for rollout {}: {err}",
|
||||
candidate.rollout_path.display()
|
||||
);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
if state_db::upsert_thread_memory(
|
||||
session.services.state_db.as_deref(),
|
||||
candidate.thread_id,
|
||||
&stage_one_output.raw_memory,
|
||||
&stage_one_output.summary,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
.await
|
||||
.is_none()
|
||||
{
|
||||
warn!(
|
||||
"failed to upsert thread memory for rollout {}; removing {}",
|
||||
candidate.rollout_path.display(),
|
||||
raw_memory_path.display()
|
||||
);
|
||||
if let Err(err) = tokio::fs::remove_file(&raw_memory_path).await
|
||||
&& err.kind() != std::io::ErrorKind::NotFound
|
||||
{
|
||||
warn!(
|
||||
"failed to remove orphaned raw memory {}: {err}",
|
||||
raw_memory_path.display()
|
||||
);
|
||||
touched_scopes.insert(scope.clone());
|
||||
}
|
||||
return None;
|
||||
}
|
||||
info!(
|
||||
"memory phase-1 raw memory persisted: rollout={} cwd={} raw_memory_path={}",
|
||||
candidate.rollout_path.display(),
|
||||
candidate.cwd.display(),
|
||||
raw_memory_path.display()
|
||||
);
|
||||
|
||||
Some(candidate.cwd)
|
||||
touched_scopes
|
||||
}
|
||||
|
||||
async fn run_memory_consolidation_for_cwd(
|
||||
fn parse_source_updated_at_epoch(candidate: &memories::RolloutCandidate) -> i64 {
|
||||
candidate
|
||||
.updated_at
|
||||
.as_deref()
|
||||
.and_then(|value| DateTime::parse_from_rfc3339(value).ok())
|
||||
.map(|value| value.with_timezone(&Utc).timestamp())
|
||||
.unwrap_or_else(|| Utc::now().timestamp())
|
||||
}
|
||||
|
||||
fn memory_scope_targets_for_candidate(
|
||||
config: &Config,
|
||||
candidate: &memories::RolloutCandidate,
|
||||
) -> Vec<MemoryScopeTarget> {
|
||||
vec![
|
||||
MemoryScopeTarget {
|
||||
scope_kind: memories::MEMORY_SCOPE_KIND_CWD,
|
||||
scope_key: memories::memory_scope_key_for_cwd(&candidate.cwd),
|
||||
memory_root: memories::memory_root_for_cwd(&config.codex_home, &candidate.cwd),
|
||||
},
|
||||
MemoryScopeTarget {
|
||||
scope_kind: memories::MEMORY_SCOPE_KIND_USER,
|
||||
scope_key: memories::MEMORY_SCOPE_KEY_USER.to_string(),
|
||||
memory_root: memories::memory_root_for_user(&config.codex_home),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
async fn fail_claimed_phase_one_jobs(
|
||||
session: &Session,
|
||||
candidate: &memories::RolloutCandidate,
|
||||
claimed_scopes: &[(MemoryScopeTarget, String)],
|
||||
reason: &str,
|
||||
) {
|
||||
for (scope, ownership_token) in claimed_scopes {
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
reason,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn persist_phase_one_memory_for_scope(
|
||||
session: &Session,
|
||||
candidate: &memories::RolloutCandidate,
|
||||
scope: &MemoryScopeTarget,
|
||||
ownership_token: &str,
|
||||
raw_memory: &str,
|
||||
summary: &str,
|
||||
) -> bool {
|
||||
let Some(state_db) = session.services.state_db.as_deref() else {
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
"state db unavailable for scoped thread memory upsert",
|
||||
)
|
||||
.await;
|
||||
return false;
|
||||
};
|
||||
|
||||
let lease_renewed = match state_db
|
||||
.renew_phase1_job_lease(
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(renewed) => renewed,
|
||||
Err(err) => {
|
||||
warn!("state db renew_phase1_job_lease failed during {MEMORY_STARTUP_STAGE}: {err}");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
if !lease_renewed {
|
||||
debug!(
|
||||
"memory phase-1 write skipped after ownership changed: rollout={} scope={} scope_key={}",
|
||||
candidate.rollout_path.display(),
|
||||
scope.scope_kind,
|
||||
scope.scope_key
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
let upserted = match state_db
|
||||
.upsert_thread_memory_for_scope_if_phase1_owner(
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
raw_memory,
|
||||
summary,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(upserted) => upserted,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"state db upsert_thread_memory_for_scope_if_phase1_owner failed during {MEMORY_STARTUP_STAGE}: {err}"
|
||||
);
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
"failed to upsert scoped thread memory",
|
||||
)
|
||||
.await;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
if upserted.is_none() {
|
||||
debug!(
|
||||
"memory phase-1 db upsert skipped after ownership changed: rollout={} scope={} scope_key={}",
|
||||
candidate.rollout_path.display(),
|
||||
scope.scope_kind,
|
||||
scope.scope_key
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
let latest_memories = match state_db
|
||||
.get_last_n_thread_memories_for_scope(
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
memories::MAX_RAW_MEMORIES_PER_SCOPE,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(memories) => memories,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"state db get_last_n_thread_memories_for_scope failed during {MEMORY_STARTUP_STAGE}: {err}"
|
||||
);
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
"failed to read scope memories after upsert",
|
||||
)
|
||||
.await;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(err) =
|
||||
memories::sync_raw_memories_from_memories(&scope.memory_root, &latest_memories).await
|
||||
{
|
||||
warn!(
|
||||
"failed syncing raw memories for scope {}:{} root={}: {err}",
|
||||
scope.scope_kind,
|
||||
scope.scope_key,
|
||||
scope.memory_root.display()
|
||||
);
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
"failed to sync scope raw memories",
|
||||
)
|
||||
.await;
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Err(err) =
|
||||
memories::rebuild_memory_summary_from_memories(&scope.memory_root, &latest_memories).await
|
||||
{
|
||||
warn!(
|
||||
"failed rebuilding memory_summary for scope {}:{} root={}: {err}",
|
||||
scope.scope_kind,
|
||||
scope.scope_key,
|
||||
scope.memory_root.display()
|
||||
);
|
||||
mark_phase1_job_failed_best_effort(
|
||||
session,
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
"failed to rebuild scope memory summary",
|
||||
)
|
||||
.await;
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(summary.as_bytes());
|
||||
let summary_hash = format!("{:x}", hasher.finalize());
|
||||
let raw_memory_path = scope
|
||||
.memory_root
|
||||
.join("raw_memories")
|
||||
.join(format!("{}.md", candidate.thread_id));
|
||||
let marked_succeeded = match state_db
|
||||
.mark_phase1_job_succeeded(
|
||||
candidate.thread_id,
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
ownership_token,
|
||||
&raw_memory_path.display().to_string(),
|
||||
&summary_hash,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(marked) => marked,
|
||||
Err(err) => {
|
||||
warn!("state db mark_phase1_job_succeeded failed during {MEMORY_STARTUP_STAGE}: {err}");
|
||||
return false;
|
||||
}
|
||||
};
|
||||
if !marked_succeeded {
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Err(err) = state_db
|
||||
.mark_memory_scope_dirty(scope.scope_kind, &scope.scope_key, true)
|
||||
.await
|
||||
{
|
||||
warn!("state db mark_memory_scope_dirty failed during {MEMORY_STARTUP_STAGE}: {err}");
|
||||
}
|
||||
|
||||
info!(
|
||||
"memory phase-1 raw memory persisted: rollout={} scope={} scope_key={} raw_memory_path={}",
|
||||
candidate.rollout_path.display(),
|
||||
scope.scope_kind,
|
||||
scope.scope_key,
|
||||
raw_memory_path.display()
|
||||
);
|
||||
true
|
||||
}
|
||||
|
||||
async fn run_memory_consolidation_for_scope(
|
||||
session: Arc<Session>,
|
||||
config: Arc<Config>,
|
||||
cwd: PathBuf,
|
||||
scope: MemoryScopeTarget,
|
||||
) {
|
||||
let lock_owner = session.conversation_id;
|
||||
let Some(lock_acquired) = state_db::try_acquire_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
memories::CONSOLIDATION_LOCK_LEASE_SECONDS,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
@@ -323,34 +697,27 @@ async fn run_memory_consolidation_for_cwd(
|
||||
.await
|
||||
else {
|
||||
warn!(
|
||||
"failed to acquire memory consolidation lock for cwd {}; skipping consolidation",
|
||||
cwd.display()
|
||||
"failed to acquire memory consolidation lock for scope {}:{}; skipping consolidation",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
return;
|
||||
};
|
||||
if !lock_acquired {
|
||||
debug!(
|
||||
"memory consolidation lock already held for cwd {}; skipping",
|
||||
cwd.display()
|
||||
"memory consolidation lock already held for scope {}:{}; skipping",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(latest_memories) = state_db::get_last_n_thread_memories_for_cwd(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
memories::MAX_RAW_MEMORIES_PER_CWD,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
.await
|
||||
else {
|
||||
let Some(state_db) = session.services.state_db.as_deref() else {
|
||||
warn!(
|
||||
"failed to read recent thread memories for cwd {}; skipping consolidation",
|
||||
cwd.display()
|
||||
"state db unavailable for scope {}:{}; skipping consolidation",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
let _ = state_db::release_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
@@ -358,17 +725,41 @@ async fn run_memory_consolidation_for_cwd(
|
||||
return;
|
||||
};
|
||||
|
||||
let memory_root = memories::memory_root_for_cwd(&config.codex_home, &cwd);
|
||||
let latest_memories = match state_db
|
||||
.get_last_n_thread_memories_for_scope(
|
||||
scope.scope_kind,
|
||||
&scope.scope_key,
|
||||
memories::MAX_RAW_MEMORIES_PER_SCOPE,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(memories) => memories,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"state db get_last_n_thread_memories_for_scope failed during {MEMORY_STARTUP_STAGE}: {err}"
|
||||
);
|
||||
let _ = state_db::release_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let memory_root = scope.memory_root.clone();
|
||||
if let Err(err) =
|
||||
memories::prune_to_recent_memories_and_rebuild_summary(&memory_root, &latest_memories).await
|
||||
{
|
||||
warn!(
|
||||
"failed to refresh phase-1 memory outputs for cwd {}: {err}",
|
||||
cwd.display()
|
||||
"failed to refresh phase-1 memory outputs for scope {}:{}: {err}",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
let _ = state_db::release_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
@@ -378,12 +769,12 @@ async fn run_memory_consolidation_for_cwd(
|
||||
|
||||
if let Err(err) = memories::wipe_consolidation_outputs(&memory_root).await {
|
||||
warn!(
|
||||
"failed to wipe previous consolidation outputs for cwd {}: {err}",
|
||||
cwd.display()
|
||||
"failed to wipe previous consolidation outputs for scope {}:{}: {err}",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
let _ = state_db::release_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
@@ -409,25 +800,24 @@ async fn run_memory_consolidation_for_cwd(
|
||||
{
|
||||
Ok(consolidation_agent_id) => {
|
||||
info!(
|
||||
"memory phase-2 consolidation agent started: cwd={} agent_id={}",
|
||||
cwd.display(),
|
||||
consolidation_agent_id
|
||||
"memory phase-2 consolidation agent started: scope={} scope_key={} agent_id={}",
|
||||
scope.scope_kind, scope.scope_key, consolidation_agent_id
|
||||
);
|
||||
spawn_memory_lock_release_task(
|
||||
session.as_ref(),
|
||||
cwd,
|
||||
scope.memory_root,
|
||||
lock_owner,
|
||||
consolidation_agent_id,
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"failed to spawn memory consolidation agent for cwd {}: {err}",
|
||||
cwd.display()
|
||||
"failed to spawn memory consolidation agent for scope {}:{}: {err}",
|
||||
scope.scope_kind, scope.scope_key
|
||||
);
|
||||
let _ = state_db::release_memory_consolidation_lock(
|
||||
session.services.state_db.as_deref(),
|
||||
&cwd,
|
||||
&scope.memory_root,
|
||||
lock_owner,
|
||||
MEMORY_STARTUP_STAGE,
|
||||
)
|
||||
@@ -495,6 +885,31 @@ fn spawn_memory_lock_release_task(
|
||||
});
|
||||
}
|
||||
|
||||
async fn mark_phase1_job_failed_best_effort(
|
||||
session: &Session,
|
||||
thread_id: ThreadId,
|
||||
scope_kind: &str,
|
||||
scope_key: &str,
|
||||
ownership_token: &str,
|
||||
failure_reason: &str,
|
||||
) {
|
||||
let Some(state_db) = session.services.state_db.as_deref() else {
|
||||
return;
|
||||
};
|
||||
if let Err(err) = state_db
|
||||
.mark_phase1_job_failed(
|
||||
thread_id,
|
||||
scope_kind,
|
||||
scope_key,
|
||||
ownership_token,
|
||||
failure_reason,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("state db mark_phase1_job_failed failed during {MEMORY_STARTUP_STAGE}: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn collect_response_text_until_completed(stream: &mut ResponseStream) -> CodexResult<String> {
|
||||
let mut output_text = String::new();
|
||||
|
||||
|
||||
@@ -17,13 +17,20 @@ use std::path::PathBuf;
|
||||
/// Subagent source label used to identify consolidation tasks.
|
||||
pub(crate) const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
|
||||
/// Maximum number of rollout candidates processed per startup pass.
|
||||
pub(crate) const MAX_ROLLOUTS_PER_STARTUP: usize = 8;
|
||||
pub(crate) const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
|
||||
/// Concurrency cap for startup memory extraction and consolidation scheduling.
|
||||
pub(crate) const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
|
||||
/// Maximum number of recent raw memories retained per working directory.
|
||||
pub(crate) const MAX_RAW_MEMORIES_PER_CWD: usize = 10;
|
||||
/// Maximum number of recent raw memories retained per scope.
|
||||
pub(crate) const MAX_RAW_MEMORIES_PER_SCOPE: usize = 64;
|
||||
/// Maximum rollout age considered for phase-1 extraction.
|
||||
pub(crate) const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
|
||||
/// Lease duration (seconds) for phase-1 job ownership.
|
||||
pub(crate) const PHASE_ONE_JOB_LEASE_SECONDS: i64 = 3_600;
|
||||
/// Lease duration (seconds) for per-cwd consolidation locks.
|
||||
pub(crate) const CONSOLIDATION_LOCK_LEASE_SECONDS: i64 = 600;
|
||||
pub(crate) const MEMORY_SCOPE_KIND_CWD: &str = "cwd";
|
||||
pub(crate) const MEMORY_SCOPE_KIND_USER: &str = "user";
|
||||
pub(crate) const MEMORY_SCOPE_KEY_USER: &str = "user";
|
||||
|
||||
const MEMORY_SUBDIR: &str = "memory";
|
||||
const RAW_MEMORIES_SUBDIR: &str = "raw_memories";
|
||||
@@ -31,6 +38,7 @@ const MEMORY_SUMMARY_FILENAME: &str = "memory_summary.md";
|
||||
const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
|
||||
const LEGACY_CONSOLIDATED_FILENAME: &str = "consolidated.md";
|
||||
const SKILLS_SUBDIR: &str = "skills";
|
||||
const CWD_MEMORY_BUCKET_HEX_LEN: usize = 16;
|
||||
|
||||
pub(crate) use phase_one::RAW_MEMORY_PROMPT;
|
||||
pub(crate) use phase_one::parse_stage_one_output;
|
||||
@@ -43,8 +51,9 @@ pub(crate) use rollout::StageOneRolloutFilter;
|
||||
pub(crate) use rollout::serialize_filtered_rollout_response_items;
|
||||
pub(crate) use selection::select_rollout_candidates_from_db;
|
||||
pub(crate) use storage::prune_to_recent_memories_and_rebuild_summary;
|
||||
pub(crate) use storage::rebuild_memory_summary_from_memories;
|
||||
pub(crate) use storage::sync_raw_memories_from_memories;
|
||||
pub(crate) use storage::wipe_consolidation_outputs;
|
||||
pub(crate) use storage::write_raw_memory;
|
||||
pub(crate) use types::RolloutCandidate;
|
||||
|
||||
/// Returns the on-disk memory root directory for a given working directory.
|
||||
@@ -56,6 +65,21 @@ pub(crate) fn memory_root_for_cwd(codex_home: &Path, cwd: &Path) -> PathBuf {
|
||||
codex_home.join("memories").join(bucket).join(MEMORY_SUBDIR)
|
||||
}
|
||||
|
||||
/// Returns the DB scope key for a cwd-scoped memory entry.
|
||||
///
|
||||
/// This uses the same normalization/fallback behavior as cwd bucket derivation.
|
||||
pub(crate) fn memory_scope_key_for_cwd(cwd: &Path) -> String {
|
||||
normalize_cwd_for_memory(cwd).display().to_string()
|
||||
}
|
||||
|
||||
/// Returns the on-disk user-shared memory root directory.
|
||||
pub(crate) fn memory_root_for_user(codex_home: &Path) -> PathBuf {
|
||||
codex_home
|
||||
.join("memories")
|
||||
.join(MEMORY_SCOPE_KEY_USER)
|
||||
.join(MEMORY_SUBDIR)
|
||||
}
|
||||
|
||||
fn raw_memories_dir(root: &Path) -> PathBuf {
|
||||
root.join(RAW_MEMORIES_SUBDIR)
|
||||
}
|
||||
@@ -70,9 +94,14 @@ pub(crate) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
|
||||
}
|
||||
|
||||
fn memory_bucket_for_cwd(cwd: &Path) -> String {
|
||||
let normalized = normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf());
|
||||
let normalized = normalize_cwd_for_memory(cwd);
|
||||
let normalized = normalized.to_string_lossy();
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(normalized.as_bytes());
|
||||
format!("{:x}", hasher.finalize())
|
||||
let full_hash = format!("{:x}", hasher.finalize());
|
||||
full_hash[..CWD_MEMORY_BUCKET_HEX_LEN].to_string()
|
||||
}
|
||||
|
||||
fn normalize_cwd_for_memory(cwd: &Path) -> PathBuf {
|
||||
normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf())
|
||||
}
|
||||
|
||||
@@ -1,28 +1,25 @@
|
||||
use chrono::Duration;
|
||||
use chrono::Utc;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_state::ThreadMemory;
|
||||
use codex_state::ThreadMetadata;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use super::types::RolloutCandidate;
|
||||
|
||||
/// Selects rollout candidates that need stage-1 memory extraction.
|
||||
///
|
||||
/// A rollout is selected when it is not the active thread and has no memory yet
|
||||
/// (or the stored memory is older than the thread metadata timestamp).
|
||||
/// A rollout is selected when it is not the active thread and was updated
|
||||
/// within the configured max age window.
|
||||
pub(crate) fn select_rollout_candidates_from_db(
|
||||
items: &[ThreadMetadata],
|
||||
current_thread_id: ThreadId,
|
||||
existing_memories: &[ThreadMemory],
|
||||
max_items: usize,
|
||||
max_age_days: i64,
|
||||
) -> Vec<RolloutCandidate> {
|
||||
if max_items == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let memory_updated_by_thread = existing_memories
|
||||
.iter()
|
||||
.map(|memory| (memory.thread_id.to_string(), memory.updated_at))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
let cutoff = Utc::now() - Duration::days(max_age_days.max(0));
|
||||
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
@@ -30,10 +27,7 @@ pub(crate) fn select_rollout_candidates_from_db(
|
||||
if item.id == current_thread_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
let memory_updated_at = memory_updated_by_thread.get(&item.id.to_string());
|
||||
if memory_updated_at.is_some_and(|memory_updated_at| *memory_updated_at >= item.updated_at)
|
||||
{
|
||||
if item.updated_at < cutoff {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -41,7 +35,6 @@ pub(crate) fn select_rollout_candidates_from_db(
|
||||
thread_id: item.id,
|
||||
rollout_path: item.rollout_path.clone(),
|
||||
cwd: item.cwd.clone(),
|
||||
title: item.title.clone(),
|
||||
updated_at: Some(item.updated_at.to_rfc3339()),
|
||||
});
|
||||
|
||||
|
||||
@@ -6,47 +6,12 @@ use std::path::PathBuf;
|
||||
use tracing::warn;
|
||||
|
||||
use super::LEGACY_CONSOLIDATED_FILENAME;
|
||||
use super::MAX_RAW_MEMORIES_PER_CWD;
|
||||
use super::MAX_RAW_MEMORIES_PER_SCOPE;
|
||||
use super::MEMORY_REGISTRY_FILENAME;
|
||||
use super::SKILLS_SUBDIR;
|
||||
use super::ensure_layout;
|
||||
use super::memory_summary_file;
|
||||
use super::raw_memories_dir;
|
||||
use super::types::RolloutCandidate;
|
||||
|
||||
/// Writes (or replaces) the per-thread markdown raw memory on disk.
|
||||
///
|
||||
/// This also removes older files for the same thread id to keep one canonical
|
||||
/// raw memory file per thread.
|
||||
pub(crate) async fn write_raw_memory(
|
||||
root: &Path,
|
||||
candidate: &RolloutCandidate,
|
||||
raw_memory: &str,
|
||||
) -> std::io::Result<PathBuf> {
|
||||
let slug = build_memory_slug(&candidate.title);
|
||||
let filename = format!("{}_{}.md", candidate.thread_id, slug);
|
||||
let path = raw_memories_dir(root).join(filename);
|
||||
|
||||
remove_outdated_thread_raw_memories(root, &candidate.thread_id.to_string(), &path).await?;
|
||||
|
||||
let mut body = String::new();
|
||||
writeln!(body, "thread_id: {}", candidate.thread_id)
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
writeln!(body, "cwd: {}", candidate.cwd.display())
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
writeln!(body, "rollout_path: {}", candidate.rollout_path.display())
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
if let Some(updated_at) = candidate.updated_at.as_deref() {
|
||||
writeln!(body, "updated_at: {updated_at}")
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
}
|
||||
writeln!(body).map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
body.push_str(raw_memory.trim());
|
||||
body.push('\n');
|
||||
|
||||
tokio::fs::write(&path, body).await?;
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
/// Prunes stale raw memory files and rebuilds the routing summary for recent memories.
|
||||
pub(crate) async fn prune_to_recent_memories_and_rebuild_summary(
|
||||
@@ -57,7 +22,7 @@ pub(crate) async fn prune_to_recent_memories_and_rebuild_summary(
|
||||
|
||||
let keep = memories
|
||||
.iter()
|
||||
.take(MAX_RAW_MEMORIES_PER_CWD)
|
||||
.take(MAX_RAW_MEMORIES_PER_SCOPE)
|
||||
.map(|memory| memory.thread_id.to_string())
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
@@ -65,6 +30,38 @@ pub(crate) async fn prune_to_recent_memories_and_rebuild_summary(
|
||||
rebuild_memory_summary(root, memories).await
|
||||
}
|
||||
|
||||
/// Rebuild `memory_summary.md` for a scope without pruning raw memory files.
|
||||
pub(crate) async fn rebuild_memory_summary_from_memories(
|
||||
root: &Path,
|
||||
memories: &[ThreadMemory],
|
||||
) -> std::io::Result<()> {
|
||||
ensure_layout(root).await?;
|
||||
rebuild_memory_summary(root, memories).await
|
||||
}
|
||||
|
||||
/// Syncs canonical raw memory files from DB-backed memory rows.
|
||||
pub(crate) async fn sync_raw_memories_from_memories(
|
||||
root: &Path,
|
||||
memories: &[ThreadMemory],
|
||||
) -> std::io::Result<()> {
|
||||
ensure_layout(root).await?;
|
||||
|
||||
let retained = memories
|
||||
.iter()
|
||||
.take(MAX_RAW_MEMORIES_PER_SCOPE)
|
||||
.collect::<Vec<_>>();
|
||||
let keep = retained
|
||||
.iter()
|
||||
.map(|memory| memory.thread_id.to_string())
|
||||
.collect::<BTreeSet<_>>();
|
||||
prune_raw_memories(root, &keep).await?;
|
||||
|
||||
for memory in retained {
|
||||
write_raw_memory_for_thread(root, memory).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clears consolidation outputs so a fresh consolidation run can regenerate them.
|
||||
///
|
||||
/// Phase-1 artifacts (`raw_memories/` and `memory_summary.md`) are preserved.
|
||||
@@ -103,7 +100,7 @@ async fn rebuild_memory_summary(root: &Path, memories: &[ThreadMemory]) -> std::
|
||||
}
|
||||
|
||||
body.push_str("Map of concise summaries to thread IDs (latest first):\n\n");
|
||||
for memory in memories.iter().take(MAX_RAW_MEMORIES_PER_CWD) {
|
||||
for memory in memories.iter().take(MAX_RAW_MEMORIES_PER_SCOPE) {
|
||||
let summary = compact_summary_for_index(&memory.memory_summary);
|
||||
writeln!(body, "- {summary} (thread: `{}`)", memory.thread_id)
|
||||
.map_err(|err| std::io::Error::other(format!("format memory summary: {err}")))?;
|
||||
@@ -179,27 +176,25 @@ async fn remove_outdated_thread_raw_memories(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_memory_slug(value: &str) -> String {
|
||||
let mut slug = String::new();
|
||||
let mut last_was_sep = false;
|
||||
async fn write_raw_memory_for_thread(
|
||||
root: &Path,
|
||||
memory: &ThreadMemory,
|
||||
) -> std::io::Result<PathBuf> {
|
||||
let path = raw_memories_dir(root).join(format!("{}.md", memory.thread_id));
|
||||
|
||||
for ch in value.chars() {
|
||||
let normalized = ch.to_ascii_lowercase();
|
||||
if normalized.is_ascii_alphanumeric() {
|
||||
slug.push(normalized);
|
||||
last_was_sep = false;
|
||||
} else if !last_was_sep {
|
||||
slug.push('_');
|
||||
last_was_sep = true;
|
||||
}
|
||||
}
|
||||
remove_outdated_thread_raw_memories(root, &memory.thread_id.to_string(), &path).await?;
|
||||
|
||||
let slug = slug.trim_matches('_').to_string();
|
||||
if slug.is_empty() {
|
||||
"memory".to_string()
|
||||
} else {
|
||||
slug.chars().take(64).collect()
|
||||
}
|
||||
let mut body = String::new();
|
||||
writeln!(body, "thread_id: {}", memory.thread_id)
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
writeln!(body, "updated_at: {}", memory.updated_at.to_rfc3339())
|
||||
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
writeln!(body).map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
|
||||
body.push_str(memory.raw_memory.trim());
|
||||
body.push('\n');
|
||||
|
||||
tokio::fs::write(&path, body).await?;
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
fn compact_summary_for_index(summary: &str) -> String {
|
||||
@@ -208,10 +203,15 @@ fn compact_summary_for_index(summary: &str) -> String {
|
||||
|
||||
fn extract_thread_id_from_summary_filename(file_name: &str) -> Option<&str> {
|
||||
let stem = file_name.strip_suffix(".md")?;
|
||||
let (thread_id, _) = stem.split_once('_')?;
|
||||
if thread_id.is_empty() {
|
||||
if stem.is_empty() {
|
||||
None
|
||||
} else if let Some((thread_id, _legacy_slug)) = stem.split_once('_') {
|
||||
if thread_id.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(thread_id)
|
||||
}
|
||||
} else {
|
||||
Some(thread_id)
|
||||
Some(stem)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use super::MEMORY_SCOPE_KIND_CWD;
|
||||
use super::PHASE_ONE_MAX_ROLLOUT_AGE_DAYS;
|
||||
use super::StageOneResponseItemKinds;
|
||||
use super::StageOneRolloutFilter;
|
||||
use super::ensure_layout;
|
||||
use super::memory_root_for_cwd;
|
||||
use super::memory_scope_key_for_cwd;
|
||||
use super::memory_summary_file;
|
||||
use super::parse_stage_one_output;
|
||||
use super::prune_to_recent_memories_and_rebuild_summary;
|
||||
@@ -77,7 +80,7 @@ fn memory_root_varies_by_cwd() {
|
||||
.and_then(std::path::Path::file_name)
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
.expect("cwd bucket");
|
||||
assert_eq!(bucket_a.len(), 64);
|
||||
assert_eq!(bucket_a.len(), 16);
|
||||
assert!(bucket_a.chars().all(|ch| ch.is_ascii_hexdigit()));
|
||||
}
|
||||
|
||||
@@ -97,6 +100,22 @@ fn memory_root_encoding_avoids_component_collisions() {
|
||||
assert!(!root_hash.display().to_string().contains("workspace"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memory_scope_key_uses_normalized_cwd() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let workspace = dir.path().join("workspace");
|
||||
std::fs::create_dir_all(&workspace).expect("mkdir workspace");
|
||||
std::fs::create_dir_all(workspace.join("nested")).expect("mkdir nested");
|
||||
|
||||
let alias = workspace.join("nested").join("..");
|
||||
let normalized = workspace
|
||||
.canonicalize()
|
||||
.expect("canonical workspace path should resolve");
|
||||
let alias_key = memory_scope_key_for_cwd(&alias);
|
||||
let normalized_key = memory_scope_key_for_cwd(&normalized);
|
||||
assert_eq!(alias_key, normalized_key);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_stage_one_output_accepts_fenced_json() {
|
||||
let raw = "```json\n{\"rawMemory\":\"abc\",\"summary\":\"short\"}\n```";
|
||||
@@ -206,64 +225,58 @@ fn serialize_filtered_rollout_response_items_filters_by_response_item_kind() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn select_rollout_candidates_uses_db_memory_recency() {
|
||||
fn select_rollout_candidates_filters_by_age_window() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let cwd_a = dir.path().join("workspace-a");
|
||||
let cwd_b = dir.path().join("workspace-b");
|
||||
std::fs::create_dir_all(&cwd_a).expect("mkdir cwd a");
|
||||
std::fs::create_dir_all(&cwd_b).expect("mkdir cwd b");
|
||||
|
||||
let now = Utc::now().timestamp();
|
||||
let current_thread_id = ThreadId::default();
|
||||
let stale_thread_id = ThreadId::default();
|
||||
let fresh_thread_id = ThreadId::default();
|
||||
let missing_thread_id = ThreadId::default();
|
||||
let recent_thread_id = ThreadId::default();
|
||||
let old_thread_id = ThreadId::default();
|
||||
let recent_two_thread_id = ThreadId::default();
|
||||
|
||||
let current = thread_metadata(
|
||||
current_thread_id,
|
||||
dir.path().join("current.jsonl"),
|
||||
cwd_a.clone(),
|
||||
"current",
|
||||
500,
|
||||
now,
|
||||
);
|
||||
let fresh = thread_metadata(
|
||||
fresh_thread_id,
|
||||
dir.path().join("fresh.jsonl"),
|
||||
let recent = thread_metadata(
|
||||
recent_thread_id,
|
||||
dir.path().join("recent.jsonl"),
|
||||
cwd_a,
|
||||
"fresh",
|
||||
400,
|
||||
"recent",
|
||||
now - 10,
|
||||
);
|
||||
let stale = thread_metadata(
|
||||
stale_thread_id,
|
||||
dir.path().join("stale.jsonl"),
|
||||
let old = thread_metadata(
|
||||
old_thread_id,
|
||||
dir.path().join("old.jsonl"),
|
||||
cwd_b.clone(),
|
||||
"stale",
|
||||
300,
|
||||
"old",
|
||||
now - (PHASE_ONE_MAX_ROLLOUT_AGE_DAYS + 1) * 24 * 60 * 60,
|
||||
);
|
||||
let missing = thread_metadata(
|
||||
missing_thread_id,
|
||||
dir.path().join("missing.jsonl"),
|
||||
let recent_two = thread_metadata(
|
||||
recent_two_thread_id,
|
||||
dir.path().join("recent-two.jsonl"),
|
||||
cwd_b,
|
||||
"missing",
|
||||
200,
|
||||
"recent-two",
|
||||
now - 20,
|
||||
);
|
||||
|
||||
let memories = vec![ThreadMemory {
|
||||
thread_id: fresh_thread_id,
|
||||
raw_memory: "raw memory".to_string(),
|
||||
memory_summary: "memory".to_string(),
|
||||
updated_at: Utc.timestamp_opt(450, 0).single().expect("timestamp"),
|
||||
}];
|
||||
|
||||
let candidates = select_rollout_candidates_from_db(
|
||||
&[current, fresh, stale, missing],
|
||||
&[current, recent, old, recent_two],
|
||||
current_thread_id,
|
||||
&memories,
|
||||
5,
|
||||
PHASE_ONE_MAX_ROLLOUT_AGE_DAYS,
|
||||
);
|
||||
|
||||
assert_eq!(candidates.len(), 2);
|
||||
assert_eq!(candidates[0].thread_id, stale_thread_id);
|
||||
assert_eq!(candidates[1].thread_id, missing_thread_id);
|
||||
assert_eq!(candidates[0].thread_id, recent_thread_id);
|
||||
assert_eq!(candidates[1].thread_id, recent_two_thread_id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -274,8 +287,8 @@ async fn prune_and_rebuild_summary_keeps_latest_memories_only() {
|
||||
|
||||
let keep_id = ThreadId::default().to_string();
|
||||
let drop_id = ThreadId::default().to_string();
|
||||
let keep_path = raw_memories_dir(&root).join(format!("{keep_id}_keep.md"));
|
||||
let drop_path = raw_memories_dir(&root).join(format!("{drop_id}_drop.md"));
|
||||
let keep_path = raw_memories_dir(&root).join(format!("{keep_id}.md"));
|
||||
let drop_path = raw_memories_dir(&root).join(format!("{drop_id}.md"));
|
||||
tokio::fs::write(&keep_path, "keep")
|
||||
.await
|
||||
.expect("write keep");
|
||||
@@ -285,9 +298,15 @@ async fn prune_and_rebuild_summary_keeps_latest_memories_only() {
|
||||
|
||||
let memories = vec![ThreadMemory {
|
||||
thread_id: ThreadId::try_from(keep_id.clone()).expect("thread id"),
|
||||
scope_kind: MEMORY_SCOPE_KIND_CWD.to_string(),
|
||||
scope_key: "scope".to_string(),
|
||||
raw_memory: "raw memory".to_string(),
|
||||
memory_summary: "short summary".to_string(),
|
||||
updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
|
||||
last_used_at: None,
|
||||
used_count: 0,
|
||||
invalidated_at: None,
|
||||
invalid_reason: None,
|
||||
}];
|
||||
|
||||
prune_to_recent_memories_and_rebuild_summary(&root, &memories)
|
||||
|
||||
@@ -11,8 +11,6 @@ pub(crate) struct RolloutCandidate {
|
||||
pub(crate) rollout_path: PathBuf,
|
||||
/// Thread working directory used for per-project memory bucketing.
|
||||
pub(crate) cwd: PathBuf,
|
||||
/// Best-effort thread title used to build readable memory filenames.
|
||||
pub(crate) title: String,
|
||||
/// Last observed thread update timestamp (RFC3339), if available.
|
||||
pub(crate) updated_at: Option<String>,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user