mirror of
https://github.com/openai/codex.git
synced 2026-05-03 10:56:37 +00:00
chore: unify memory job flow (#11334)
This commit is contained in:
@@ -1,109 +1,46 @@
|
||||
mod phase_one;
|
||||
//! Memory subsystem for startup extraction and consolidation.
|
||||
//!
|
||||
//! The startup memory pipeline is split into two phases:
|
||||
//! - Phase 1: select rollouts, extract stage-1 raw memories, persist stage-1 outputs, and enqueue consolidation.
|
||||
//! - Phase 2: claim scopes, materialize consolidation inputs, and dispatch consolidation agents.
|
||||
|
||||
mod layout;
|
||||
mod prompts;
|
||||
mod rollout;
|
||||
mod selection;
|
||||
mod scope;
|
||||
mod stage_one;
|
||||
mod startup;
|
||||
mod storage;
|
||||
mod text;
|
||||
mod types;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use crate::path_utils::normalize_for_path_comparison;
|
||||
use sha2::Digest;
|
||||
use sha2::Sha256;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Subagent source label used to identify consolidation tasks.
|
||||
pub(crate) const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
|
||||
const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
|
||||
/// Maximum number of rollout candidates processed per startup pass.
|
||||
pub(crate) const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
|
||||
const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
|
||||
/// Concurrency cap for startup memory extraction and consolidation scheduling.
|
||||
pub(crate) const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
|
||||
const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
|
||||
/// Concurrency cap for phase-2 consolidation dispatch.
|
||||
const PHASE_TWO_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
|
||||
/// Maximum number of recent raw memories retained per scope.
|
||||
pub(crate) const MAX_RAW_MEMORIES_PER_SCOPE: usize = 64;
|
||||
const MAX_RAW_MEMORIES_PER_SCOPE: usize = 64;
|
||||
/// Maximum rollout age considered for phase-1 extraction.
|
||||
pub(crate) const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
|
||||
const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
|
||||
/// Lease duration (seconds) for phase-1 job ownership.
|
||||
pub(crate) const PHASE_ONE_JOB_LEASE_SECONDS: i64 = 3_600;
|
||||
const PHASE_ONE_JOB_LEASE_SECONDS: i64 = 3_600;
|
||||
/// Backoff delay (seconds) before retrying a failed stage-1 extraction job.
|
||||
const PHASE_ONE_JOB_RETRY_DELAY_SECONDS: i64 = 3_600;
|
||||
/// Lease duration (seconds) for phase-2 consolidation job ownership.
|
||||
pub(crate) const PHASE_TWO_JOB_LEASE_SECONDS: i64 = 3_600;
|
||||
const PHASE_TWO_JOB_LEASE_SECONDS: i64 = 3_600;
|
||||
/// Backoff delay (seconds) before retrying a failed phase-2 consolidation job.
|
||||
const PHASE_TWO_JOB_RETRY_DELAY_SECONDS: i64 = 3_600;
|
||||
/// Heartbeat interval (seconds) for phase-2 running jobs.
|
||||
pub(crate) const PHASE_TWO_JOB_HEARTBEAT_SECONDS: u64 = 30;
|
||||
pub(crate) const MEMORY_SCOPE_KIND_CWD: &str = "cwd";
|
||||
pub(crate) const MEMORY_SCOPE_KIND_USER: &str = "user";
|
||||
pub(crate) const MEMORY_SCOPE_KEY_USER: &str = "user";
|
||||
const PHASE_TWO_JOB_HEARTBEAT_SECONDS: u64 = 30;
|
||||
|
||||
const MEMORY_SUBDIR: &str = "memory";
|
||||
const RAW_MEMORIES_SUBDIR: &str = "raw_memories";
|
||||
const MEMORY_SUMMARY_FILENAME: &str = "memory_summary.md";
|
||||
const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
|
||||
const LEGACY_CONSOLIDATED_FILENAME: &str = "consolidated.md";
|
||||
const SKILLS_SUBDIR: &str = "skills";
|
||||
const CWD_MEMORY_BUCKET_HEX_LEN: usize = 16;
|
||||
|
||||
pub(crate) use phase_one::RAW_MEMORY_PROMPT;
|
||||
pub(crate) use phase_one::parse_stage_one_output;
|
||||
pub(crate) use phase_one::stage_one_output_schema;
|
||||
pub(crate) use prompts::build_consolidation_prompt;
|
||||
pub(crate) use prompts::build_stage_one_input_message;
|
||||
#[cfg(test)]
|
||||
pub(crate) use rollout::StageOneResponseItemKinds;
|
||||
pub(crate) use rollout::StageOneRolloutFilter;
|
||||
pub(crate) use rollout::serialize_filtered_rollout_response_items;
|
||||
pub(crate) use selection::select_rollout_candidates_from_db;
|
||||
pub(crate) use storage::prune_to_recent_memories_and_rebuild_summary;
|
||||
pub(crate) use storage::rebuild_memory_summary_from_memories;
|
||||
pub(crate) use storage::sync_raw_memories_from_memories;
|
||||
pub(crate) use storage::wipe_consolidation_outputs;
|
||||
pub(crate) use types::RolloutCandidate;
|
||||
|
||||
/// Returns the on-disk memory root directory for a given working directory.
|
||||
/// Starts the memory startup pipeline for eligible root sessions.
|
||||
///
|
||||
/// The cwd is normalized and hashed into a deterministic bucket under
|
||||
/// `<codex_home>/memories/<hash>/memory`.
|
||||
pub(crate) fn memory_root_for_cwd(codex_home: &Path, cwd: &Path) -> PathBuf {
|
||||
let bucket = memory_bucket_for_cwd(cwd);
|
||||
codex_home.join("memories").join(bucket).join(MEMORY_SUBDIR)
|
||||
}
|
||||
|
||||
/// Returns the DB scope key for a cwd-scoped memory entry.
|
||||
///
|
||||
/// This uses the same normalization/fallback behavior as cwd bucket derivation.
|
||||
pub(crate) fn memory_scope_key_for_cwd(cwd: &Path) -> String {
|
||||
normalize_cwd_for_memory(cwd).display().to_string()
|
||||
}
|
||||
|
||||
/// Returns the on-disk user-shared memory root directory.
|
||||
pub(crate) fn memory_root_for_user(codex_home: &Path) -> PathBuf {
|
||||
codex_home
|
||||
.join("memories")
|
||||
.join(MEMORY_SCOPE_KEY_USER)
|
||||
.join(MEMORY_SUBDIR)
|
||||
}
|
||||
|
||||
fn raw_memories_dir(root: &Path) -> PathBuf {
|
||||
root.join(RAW_MEMORIES_SUBDIR)
|
||||
}
|
||||
|
||||
fn memory_summary_file(root: &Path) -> PathBuf {
|
||||
root.join(MEMORY_SUMMARY_FILENAME)
|
||||
}
|
||||
|
||||
/// Ensures the phase-1 memory directory layout exists for the given root.
|
||||
pub(crate) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
|
||||
tokio::fs::create_dir_all(raw_memories_dir(root)).await
|
||||
}
|
||||
|
||||
fn memory_bucket_for_cwd(cwd: &Path) -> String {
|
||||
let normalized = normalize_cwd_for_memory(cwd);
|
||||
let normalized = normalized.to_string_lossy();
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(normalized.as_bytes());
|
||||
let full_hash = format!("{:x}", hasher.finalize());
|
||||
full_hash[..CWD_MEMORY_BUCKET_HEX_LEN].to_string()
|
||||
}
|
||||
|
||||
fn normalize_cwd_for_memory(cwd: &Path) -> PathBuf {
|
||||
normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf())
|
||||
}
|
||||
/// This is the single entrypoint that `codex` uses to trigger memory startup.
|
||||
pub(crate) use startup::start_memories_startup_task;
|
||||
|
||||
Reference in New Issue
Block a user