mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
feat: mem v2 - PR5 (#11372)
This commit is contained in:
@@ -1,83 +0,0 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub(super) const ROLLOUT_SUMMARIES_SUBDIR: &str = "rollout_summaries";
|
||||
pub(super) const RAW_MEMORIES_FILENAME: &str = "raw_memories.md";
|
||||
pub(super) const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
|
||||
pub(super) const LEGACY_CONSOLIDATED_FILENAME: &str = "consolidated.md";
|
||||
pub(super) const SKILLS_SUBDIR: &str = "skills";
|
||||
const LEGACY_USER_SUBDIR: &str = "user";
|
||||
const LEGACY_MEMORY_SUBDIR: &str = "memory";
|
||||
|
||||
/// Returns the shared on-disk memory root directory.
|
||||
pub(super) fn memory_root(codex_home: &Path) -> PathBuf {
|
||||
codex_home.join("memories")
|
||||
}
|
||||
|
||||
pub(super) fn rollout_summaries_dir(root: &Path) -> PathBuf {
|
||||
root.join(ROLLOUT_SUMMARIES_SUBDIR)
|
||||
}
|
||||
|
||||
pub(super) fn raw_memories_file(root: &Path) -> PathBuf {
|
||||
root.join(RAW_MEMORIES_FILENAME)
|
||||
}
|
||||
|
||||
/// Migrates legacy user memory contents into the shared root when no shared-root
|
||||
/// phase artifacts exist yet.
|
||||
pub(super) async fn migrate_legacy_user_memory_root_if_needed(
|
||||
codex_home: &Path,
|
||||
) -> std::io::Result<()> {
|
||||
let root = memory_root(codex_home);
|
||||
let legacy = legacy_user_memory_root(codex_home);
|
||||
|
||||
if !tokio::fs::try_exists(&legacy).await? || global_root_has_phase_artifacts(&root).await? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
copy_dir_contents_if_missing(&legacy, &root).await
|
||||
}
|
||||
|
||||
/// Ensures the phase-1 memory directory layout exists for the given root.
|
||||
pub(super) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
|
||||
tokio::fs::create_dir_all(rollout_summaries_dir(root)).await
|
||||
}
|
||||
|
||||
fn legacy_user_memory_root(codex_home: &Path) -> PathBuf {
|
||||
codex_home
|
||||
.join("memories")
|
||||
.join(LEGACY_USER_SUBDIR)
|
||||
.join(LEGACY_MEMORY_SUBDIR)
|
||||
}
|
||||
|
||||
async fn global_root_has_phase_artifacts(root: &Path) -> std::io::Result<bool> {
|
||||
if tokio::fs::try_exists(&rollout_summaries_dir(root)).await?
|
||||
|| tokio::fs::try_exists(&raw_memories_file(root)).await?
|
||||
|| tokio::fs::try_exists(&root.join(MEMORY_REGISTRY_FILENAME)).await?
|
||||
|| tokio::fs::try_exists(&root.join(LEGACY_CONSOLIDATED_FILENAME)).await?
|
||||
|| tokio::fs::try_exists(&root.join(SKILLS_SUBDIR)).await?
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn copy_dir_contents_if_missing<'a>(
|
||||
src_dir: &'a Path,
|
||||
dst_dir: &'a Path,
|
||||
) -> futures::future::BoxFuture<'a, std::io::Result<()>> {
|
||||
Box::pin(async move {
|
||||
tokio::fs::create_dir_all(dst_dir).await?;
|
||||
let mut dir = tokio::fs::read_dir(src_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let src_path = entry.path();
|
||||
let dst_path = dst_dir.join(entry.file_name());
|
||||
let metadata = entry.metadata().await?;
|
||||
if metadata.is_dir() {
|
||||
copy_dir_contents_if_missing(&src_path, &dst_path).await?;
|
||||
} else if metadata.is_file() && !tokio::fs::try_exists(&dst_path).await? {
|
||||
tokio::fs::copy(&src_path, &dst_path).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
@@ -4,20 +4,26 @@
|
||||
//! - Phase 1: select rollouts, extract stage-1 raw memories, persist stage-1 outputs, and enqueue consolidation.
|
||||
//! - Phase 2: claim a global consolidation lock, materialize consolidation inputs, and dispatch one consolidation agent.
|
||||
|
||||
mod layout;
|
||||
mod prompts;
|
||||
mod rollout;
|
||||
mod stage_one;
|
||||
mod startup;
|
||||
mod storage;
|
||||
mod text;
|
||||
mod types;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use serde::Deserialize;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Subagent source label used to identify consolidation tasks.
|
||||
const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
|
||||
const ROLLOUT_SUMMARIES_SUBDIR: &str = "rollout_summaries";
|
||||
const RAW_MEMORIES_FILENAME: &str = "raw_memories.md";
|
||||
const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
|
||||
const SKILLS_SUBDIR: &str = "skills";
|
||||
/// Maximum number of rollout candidates processed per startup pass.
|
||||
const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
|
||||
/// Concurrency cap for startup memory extraction and consolidation scheduling.
|
||||
@@ -39,6 +45,34 @@ const PHASE_TWO_JOB_RETRY_DELAY_SECONDS: i64 = 3_600;
|
||||
/// Heartbeat interval (seconds) for phase-2 running jobs.
|
||||
const PHASE_TWO_JOB_HEARTBEAT_SECONDS: u64 = 30;
|
||||
|
||||
/// Parsed stage-1 model output payload.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct StageOneOutput {
|
||||
/// Detailed markdown raw memory for a single rollout.
|
||||
#[serde(rename = "raw_memory")]
|
||||
raw_memory: String,
|
||||
/// Compact summary line used for routing and indexing.
|
||||
#[serde(rename = "rollout_summary")]
|
||||
rollout_summary: String,
|
||||
}
|
||||
|
||||
fn memory_root(codex_home: &Path) -> PathBuf {
|
||||
codex_home.join("memories")
|
||||
}
|
||||
|
||||
fn rollout_summaries_dir(root: &Path) -> PathBuf {
|
||||
root.join(ROLLOUT_SUMMARIES_SUBDIR)
|
||||
}
|
||||
|
||||
fn raw_memories_file(root: &Path) -> PathBuf {
|
||||
root.join(RAW_MEMORIES_FILENAME)
|
||||
}
|
||||
|
||||
async fn ensure_layout(root: &Path) -> std::io::Result<()> {
|
||||
tokio::fs::create_dir_all(rollout_summaries_dir(root)).await
|
||||
}
|
||||
|
||||
/// Starts the memory startup pipeline for eligible root sessions.
|
||||
///
|
||||
/// This is the single entrypoint that `codex` uses to trigger memory startup.
|
||||
|
||||
@@ -5,9 +5,9 @@ use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
|
||||
use super::StageOneOutput;
|
||||
use super::text::compact_whitespace;
|
||||
use super::text::truncate_text_for_storage;
|
||||
use super::types::StageOneOutput;
|
||||
|
||||
/// System prompt for stage-1 raw memory extraction.
|
||||
pub(super) const RAW_MEMORY_PROMPT: &str =
|
||||
@@ -28,7 +28,6 @@ pub(super) fn stage_one_output_schema() -> Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"rollout_slug": { "type": "string" },
|
||||
"rollout_summary": { "type": "string" },
|
||||
"raw_memory": { "type": "string" }
|
||||
},
|
||||
@@ -97,12 +96,6 @@ fn parse_json_object_loose(raw: &str) -> Result<Value> {
|
||||
fn normalize_stage_one_output(mut output: StageOneOutput) -> Result<StageOneOutput> {
|
||||
output.raw_memory = output.raw_memory.trim().to_string();
|
||||
output.rollout_summary = output.rollout_summary.trim().to_string();
|
||||
if let Some(slug) = output.rollout_slug.take() {
|
||||
let slug = slug.trim();
|
||||
if !slug.is_empty() {
|
||||
output.rollout_slug = Some(slug.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
if output.raw_memory.is_empty() {
|
||||
return Err(CodexErr::InvalidRequest(
|
||||
@@ -195,7 +188,6 @@ mod tests {
|
||||
fn normalize_stage_one_output_redacts_and_compacts_summary() {
|
||||
let output = StageOneOutput {
|
||||
raw_memory: "Token: sk-abcdefghijklmnopqrstuvwxyz123456\nBearer abcdefghijklmnopqrstuvwxyz012345".to_string(),
|
||||
rollout_slug: None,
|
||||
rollout_summary: "password = mysecret123456\n\nsmall".to_string(),
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::codex::Session;
|
||||
use crate::config::Config;
|
||||
use crate::memories::layout::memory_root;
|
||||
use crate::memories::memory_root;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
@@ -17,7 +17,7 @@ use super::super::prompts::build_consolidation_prompt;
|
||||
use super::super::storage::rebuild_raw_memories_file_from_memories;
|
||||
use super::super::storage::sync_rollout_summaries_from_memories;
|
||||
use super::super::storage::wipe_consolidation_outputs;
|
||||
use super::watch::spawn_phase2_completion_task;
|
||||
use super::phase2::spawn_phase2_completion_task;
|
||||
|
||||
pub(super) async fn run_global_memory_consolidation(
|
||||
session: &Arc<Session>,
|
||||
|
||||
@@ -12,13 +12,13 @@ use futures::StreamExt;
|
||||
use tracing::warn;
|
||||
|
||||
use super::StageOneRequestContext;
|
||||
use crate::memories::StageOneOutput;
|
||||
use crate::memories::prompts::build_stage_one_input_message;
|
||||
use crate::memories::rollout::StageOneRolloutFilter;
|
||||
use crate::memories::rollout::serialize_filtered_rollout_response_items;
|
||||
use crate::memories::stage_one::RAW_MEMORY_PROMPT;
|
||||
use crate::memories::stage_one::parse_stage_one_output;
|
||||
use crate::memories::stage_one::stage_one_output_schema;
|
||||
use crate::memories::types::StageOneOutput;
|
||||
use std::path::Path;
|
||||
|
||||
pub(super) async fn extract_stage_one_output(
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
mod dispatch;
|
||||
mod extract;
|
||||
mod watch;
|
||||
mod phase2;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::config::Config;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::features::Feature;
|
||||
use crate::memories::layout::migrate_legacy_user_memory_root_if_needed;
|
||||
use crate::rollout::INTERACTIVE_SESSION_SOURCES;
|
||||
use codex_otel::OtelManager;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -15,7 +14,6 @@ use codex_protocol::openai_models::ModelInfo;
|
||||
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use futures::StreamExt;
|
||||
use serde_json::Value;
|
||||
use std::sync::Arc;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
@@ -80,10 +78,6 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
session: &Arc<Session>,
|
||||
config: Arc<Config>,
|
||||
) -> CodexResult<()> {
|
||||
if let Err(err) = migrate_legacy_user_memory_root_if_needed(&config.codex_home).await {
|
||||
warn!("failed migrating legacy shared memory root: {err}");
|
||||
}
|
||||
|
||||
let Some(state_db) = session.services.state_db.as_deref() else {
|
||||
warn!("state db unavailable for memories startup pipeline; skipping");
|
||||
return Ok(());
|
||||
@@ -91,11 +85,7 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
|
||||
let allowed_sources = INTERACTIVE_SESSION_SOURCES
|
||||
.iter()
|
||||
.map(|value| match serde_json::to_value(value) {
|
||||
Ok(Value::String(s)) => s,
|
||||
Ok(other) => other.to_string(),
|
||||
Err(_) => String::new(),
|
||||
})
|
||||
.map(ToString::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let claimed_candidates = match state_db
|
||||
@@ -186,7 +176,8 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
claimed_count, succeeded_count
|
||||
);
|
||||
|
||||
let consolidation_job_count = run_consolidation_dispatch(session, config).await;
|
||||
let consolidation_job_count =
|
||||
usize::from(dispatch::run_global_memory_consolidation(session, config).await);
|
||||
info!(
|
||||
"memory consolidation dispatch complete: {} job(s) scheduled",
|
||||
consolidation_job_count
|
||||
@@ -194,7 +185,3 @@ pub(super) async fn run_memories_startup_pipeline(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_consolidation_dispatch(session: &Arc<Session>, config: Arc<Config>) -> usize {
|
||||
usize::from(dispatch::run_global_memory_consolidation(session, config).await)
|
||||
}
|
||||
|
||||
@@ -5,13 +5,12 @@ use std::path::Path;
|
||||
use tracing::warn;
|
||||
|
||||
use super::MAX_RAW_MEMORIES_FOR_GLOBAL;
|
||||
use super::MEMORY_REGISTRY_FILENAME;
|
||||
use super::SKILLS_SUBDIR;
|
||||
use super::ensure_layout;
|
||||
use super::raw_memories_file;
|
||||
use super::rollout_summaries_dir;
|
||||
use super::text::compact_whitespace;
|
||||
use crate::memories::layout::LEGACY_CONSOLIDATED_FILENAME;
|
||||
use crate::memories::layout::MEMORY_REGISTRY_FILENAME;
|
||||
use crate::memories::layout::SKILLS_SUBDIR;
|
||||
use crate::memories::layout::ensure_layout;
|
||||
use crate::memories::layout::raw_memories_file;
|
||||
use crate::memories::layout::rollout_summaries_dir;
|
||||
|
||||
/// Rebuild `raw_memories.md` from DB-backed stage-1 outputs.
|
||||
pub(super) async fn rebuild_raw_memories_file_from_memories(
|
||||
@@ -49,16 +48,14 @@ pub(super) async fn sync_rollout_summaries_from_memories(
|
||||
///
|
||||
/// Phase-1 artifacts (`rollout_summaries/` and `raw_memories.md`) are preserved.
|
||||
pub(super) async fn wipe_consolidation_outputs(root: &Path) -> std::io::Result<()> {
|
||||
for file_name in [MEMORY_REGISTRY_FILENAME, LEGACY_CONSOLIDATED_FILENAME] {
|
||||
let path = root.join(file_name);
|
||||
if let Err(err) = tokio::fs::remove_file(&path).await
|
||||
&& err.kind() != std::io::ErrorKind::NotFound
|
||||
{
|
||||
warn!(
|
||||
"failed removing consolidation file {}: {err}",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
let path = root.join(MEMORY_REGISTRY_FILENAME);
|
||||
if let Err(err) = tokio::fs::remove_file(&path).await
|
||||
&& err.kind() != std::io::ErrorKind::NotFound
|
||||
{
|
||||
warn!(
|
||||
"failed removing consolidation file {}: {err}",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
|
||||
let skills_dir = root.join(SKILLS_SUBDIR);
|
||||
@@ -152,7 +149,7 @@ async fn write_rollout_summary_for_thread(
|
||||
.map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
|
||||
writeln!(body)
|
||||
.map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
|
||||
body.push_str(&compact_whitespace(&memory.summary));
|
||||
body.push_str(&compact_whitespace(&memory.rollout_summary));
|
||||
body.push('\n');
|
||||
|
||||
tokio::fs::write(path, body).await
|
||||
|
||||
@@ -5,11 +5,10 @@ use super::stage_one::parse_stage_one_output;
|
||||
use super::storage::rebuild_raw_memories_file_from_memories;
|
||||
use super::storage::sync_rollout_summaries_from_memories;
|
||||
use super::storage::wipe_consolidation_outputs;
|
||||
use crate::memories::layout::ensure_layout;
|
||||
use crate::memories::layout::memory_root;
|
||||
use crate::memories::layout::migrate_legacy_user_memory_root_if_needed;
|
||||
use crate::memories::layout::raw_memories_file;
|
||||
use crate::memories::layout::rollout_summaries_dir;
|
||||
use crate::memories::ensure_layout;
|
||||
use crate::memories::memory_root;
|
||||
use crate::memories::raw_memories_file;
|
||||
use crate::memories::rollout_summaries_dir;
|
||||
use chrono::TimeZone;
|
||||
use chrono::Utc;
|
||||
use codex_protocol::ThreadId;
|
||||
@@ -28,49 +27,18 @@ fn memory_root_uses_shared_global_path() {
|
||||
assert_eq!(memory_root(&codex_home), codex_home.join("memories"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn migrate_legacy_user_memory_root_if_needed_copies_contents() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let codex_home = dir.path().join("codex");
|
||||
let legacy_root = codex_home.join("memories").join("user").join("memory");
|
||||
tokio::fs::create_dir_all(legacy_root.join("rollout_summaries"))
|
||||
.await
|
||||
.expect("create legacy rollout summaries dir");
|
||||
tokio::fs::write(
|
||||
legacy_root.join("rollout_summaries").join("thread.md"),
|
||||
"summary",
|
||||
)
|
||||
.await
|
||||
.expect("write legacy rollout summary");
|
||||
tokio::fs::write(legacy_root.join("raw_memories.md"), "raw")
|
||||
.await
|
||||
.expect("write legacy raw memories");
|
||||
|
||||
migrate_legacy_user_memory_root_if_needed(&codex_home)
|
||||
.await
|
||||
.expect("migrate legacy memory root");
|
||||
|
||||
let root = memory_root(&codex_home);
|
||||
assert!(root.join("rollout_summaries").join("thread.md").is_file());
|
||||
assert!(root.join("raw_memories.md").is_file());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_stage_one_output_accepts_fenced_json() {
|
||||
let raw = "```json\n{\"raw_memory\":\"abc\",\"rollout_summary\":\"short\",\"rollout_slug\":\"slug\"}\n```";
|
||||
let raw = "```json\n{\"raw_memory\":\"abc\",\"rollout_summary\":\"short\"}\n```";
|
||||
let parsed = parse_stage_one_output(raw).expect("parsed");
|
||||
assert!(parsed.raw_memory.contains("abc"));
|
||||
assert_eq!(parsed.rollout_summary, "short");
|
||||
assert_eq!(parsed.rollout_slug, Some("slug".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_stage_one_output_accepts_legacy_keys() {
|
||||
fn parse_stage_one_output_rejects_legacy_keys() {
|
||||
let raw = r#"{"rawMemory":"abc","summary":"short"}"#;
|
||||
let parsed = parse_stage_one_output(raw).expect("parsed");
|
||||
assert!(parsed.raw_memory.contains("abc"));
|
||||
assert_eq!(parsed.rollout_summary, "short");
|
||||
assert_eq!(parsed.rollout_slug, None);
|
||||
assert!(parse_stage_one_output(raw).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -194,7 +162,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
||||
thread_id: ThreadId::try_from(keep_id.clone()).expect("thread id"),
|
||||
source_updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
|
||||
raw_memory: "raw memory".to_string(),
|
||||
summary: "short summary".to_string(),
|
||||
rollout_summary: "short summary".to_string(),
|
||||
generated_at: Utc.timestamp_opt(101, 0).single().expect("timestamp"),
|
||||
}];
|
||||
|
||||
@@ -216,13 +184,12 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wipe_consolidation_outputs_removes_registry_skills_and_legacy_file() {
|
||||
async fn wipe_consolidation_outputs_removes_registry_and_skills() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let root = dir.path().join("memory");
|
||||
ensure_layout(&root).await.expect("ensure layout");
|
||||
|
||||
let memory_registry = root.join("MEMORY.md");
|
||||
let legacy_consolidated = root.join("consolidated.md");
|
||||
let skills_dir = root.join("skills").join("example");
|
||||
|
||||
tokio::fs::create_dir_all(&skills_dir)
|
||||
@@ -231,15 +198,11 @@ async fn wipe_consolidation_outputs_removes_registry_skills_and_legacy_file() {
|
||||
tokio::fs::write(&memory_registry, "memory")
|
||||
.await
|
||||
.expect("write memory registry");
|
||||
tokio::fs::write(&legacy_consolidated, "legacy")
|
||||
.await
|
||||
.expect("write legacy consolidated");
|
||||
|
||||
wipe_consolidation_outputs(&root)
|
||||
.await
|
||||
.expect("wipe consolidation outputs");
|
||||
|
||||
assert!(!memory_registry.exists());
|
||||
assert!(!legacy_consolidated.exists());
|
||||
assert!(!root.join("skills").exists());
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
/// Parsed stage-1 model output payload.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub(super) struct StageOneOutput {
|
||||
/// Detailed markdown raw memory for a single rollout.
|
||||
#[serde(rename = "raw_memory", alias = "rawMemory", alias = "traceMemory")]
|
||||
pub(super) raw_memory: String,
|
||||
/// Optional rollout slug from the model output. Accepted but ignored.
|
||||
#[serde(default)]
|
||||
pub(super) rollout_slug: Option<String>,
|
||||
/// Compact summary line used for routing and indexing.
|
||||
#[serde(rename = "rollout_summary", alias = "summary")]
|
||||
pub(super) rollout_summary: String,
|
||||
}
|
||||
Reference in New Issue
Block a user