feat: mem v2 - PR6 (consolidation) (#11374)

This commit is contained in:
jif-oai
2026-02-11 00:02:57 +00:00
committed by GitHub
parent 2c9be54c9a
commit 674799d356
7 changed files with 910 additions and 156 deletions

View File

@@ -1116,3 +1116,103 @@ impl WebsocketTelemetry for ApiTelemetry {
self.otel_manager.record_websocket_event(result, duration);
}
}
#[cfg(test)]
mod tests {
use super::ModelClient;
use codex_otel::OtelManager;
use codex_protocol::ThreadId;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use pretty_assertions::assert_eq;
use serde_json::json;
fn test_model_client(session_source: SessionSource) -> ModelClient {
let provider = crate::model_provider_info::create_oss_provider_with_base_url(
"https://example.com/v1",
crate::model_provider_info::WireApi::Responses,
);
ModelClient::new(
None,
ThreadId::new(),
provider,
session_source,
None,
false,
false,
false,
false,
None,
)
}
fn test_model_info() -> ModelInfo {
serde_json::from_value(json!({
"slug": "gpt-test",
"display_name": "gpt-test",
"description": "desc",
"default_reasoning_level": "medium",
"supported_reasoning_levels": [
{"effort": "medium", "description": "medium"}
],
"shell_type": "shell_command",
"visibility": "list",
"supported_in_api": true,
"priority": 1,
"upgrade": null,
"base_instructions": "base instructions",
"model_messages": null,
"supports_reasoning_summaries": false,
"support_verbosity": false,
"default_verbosity": null,
"apply_patch_tool_type": null,
"truncation_policy": {"mode": "bytes", "limit": 10000},
"supports_parallel_tool_calls": false,
"context_window": 272000,
"auto_compact_token_limit": null,
"experimental_supported_tools": []
}))
.expect("deserialize test model info")
}
fn test_otel_manager() -> OtelManager {
OtelManager::new(
ThreadId::new(),
"gpt-test",
"gpt-test",
None,
None,
None,
"test-originator".to_string(),
false,
"test-terminal".to_string(),
SessionSource::Cli,
)
}
#[test]
fn build_subagent_headers_sets_other_subagent_label() {
let client = test_model_client(SessionSource::SubAgent(SubAgentSource::Other(
"memory_consolidation".to_string(),
)));
let headers = client.build_subagent_headers();
let value = headers
.get("x-openai-subagent")
.and_then(|value| value.to_str().ok());
assert_eq!(value, Some("memory_consolidation"));
}
#[tokio::test]
async fn summarize_memories_returns_empty_for_empty_input() {
let client = test_model_client(SessionSource::Cli);
let model_info = test_model_info();
let otel_manager = test_otel_manager();
let output = client
.summarize_memories(Vec::new(), &model_info, None, &otel_manager)
.await
.expect("empty summarize request should succeed");
assert_eq!(output.len(), 0);
}
}

View File

@@ -169,6 +169,7 @@ mod tests {
use super::run_global_memory_consolidation;
use crate::CodexAuth;
use crate::ThreadManager;
use crate::agent::control::AgentControl;
use crate::codex::Session;
use crate::codex::make_session_and_context;
use crate::config::Config;
@@ -280,6 +281,14 @@ mod tests {
.await
.expect("shutdown spawned threads");
}
fn user_input_ops_count(&self) -> usize {
self.manager
.captured_ops()
.into_iter()
.filter(|(_, op)| matches!(op, Op::UserInput { .. }))
.count()
}
}
#[tokio::test]
@@ -311,12 +320,7 @@ mod tests {
.expect("claim while running");
assert_eq!(running_claim, Phase2JobClaimOutcome::SkippedRunning);
let user_input_ops = harness
.manager
.captured_ops()
.into_iter()
.filter(|(_, op)| matches!(op, Op::UserInput { .. }))
.count();
let user_input_ops = harness.user_input_ops_count();
assert_eq!(user_input_ops, 1);
harness.shutdown_threads().await;
@@ -338,14 +342,115 @@ mod tests {
"second dispatch should skip while the global lock is running"
);
let user_input_ops = harness
.manager
.captured_ops()
.into_iter()
.filter(|(_, op)| matches!(op, Op::UserInput { .. }))
.count();
let user_input_ops = harness.user_input_ops_count();
assert_eq!(user_input_ops, 1);
harness.shutdown_threads().await;
}
#[tokio::test]
async fn dispatch_with_dirty_job_and_no_stage1_outputs_skips_spawn_and_clears_dirty_flag() {
let harness = DispatchHarness::new().await;
harness
.state_db
.enqueue_global_consolidation(999)
.await
.expect("enqueue global consolidation");
let scheduled =
run_global_memory_consolidation(&harness.session, Arc::clone(&harness.config)).await;
assert!(
!scheduled,
"dispatch should not spawn when no stage-1 outputs are available"
);
assert_eq!(harness.user_input_ops_count(), 0);
let claim = harness
.state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim global job after empty dispatch");
assert_eq!(
claim,
Phase2JobClaimOutcome::SkippedNotDirty,
"empty dispatch should finalize global job as up-to-date"
);
harness.shutdown_threads().await;
}
#[tokio::test]
async fn dispatch_marks_job_for_retry_when_spawn_agent_fails() {
let codex_home = tempfile::tempdir().expect("create temp codex home");
let mut config = test_config();
config.codex_home = codex_home.path().to_path_buf();
config.cwd = config.codex_home.clone();
let config = Arc::new(config);
let state_db = codex_state::StateRuntime::init(
config.codex_home.clone(),
config.model_provider_id.clone(),
None,
)
.await
.expect("initialize state db");
let (mut session, _turn_context) = make_session_and_context().await;
session.services.state_db = Some(Arc::clone(&state_db));
session.services.agent_control = AgentControl::default();
let session = Arc::new(session);
let thread_id = ThreadId::new();
let mut metadata_builder = ThreadMetadataBuilder::new(
thread_id,
config.codex_home.join(format!("rollout-{thread_id}.jsonl")),
Utc::now(),
SessionSource::Cli,
);
metadata_builder.cwd = config.cwd.clone();
metadata_builder.model_provider = Some(config.model_provider_id.clone());
let metadata = metadata_builder.build(&config.model_provider_id);
state_db
.upsert_thread(&metadata)
.await
.expect("upsert thread metadata");
let claim = state_db
.try_claim_stage1_job(thread_id, session.conversation_id, 100, 3_600, 64)
.await
.expect("claim stage-1 job");
let ownership_token = match claim {
codex_state::Stage1JobClaimOutcome::Claimed { ownership_token } => ownership_token,
other => panic!("unexpected stage-1 claim outcome: {other:?}"),
};
assert!(
state_db
.mark_stage1_job_succeeded(
thread_id,
&ownership_token,
100,
"raw memory",
"rollout summary",
)
.await
.expect("mark stage-1 success"),
"stage-1 success should enqueue global consolidation"
);
let scheduled = run_global_memory_consolidation(&session, Arc::clone(&config)).await;
assert!(
!scheduled,
"dispatch should return false when consolidation subagent cannot be spawned"
);
let retry_claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim global job after spawn failure");
assert_eq!(
retry_claim,
Phase2JobClaimOutcome::SkippedNotDirty,
"spawn failures should leave the job in retry backoff instead of running"
);
}
}

View File

@@ -185,3 +185,21 @@ pub(super) async fn run_memories_startup_pipeline(
Ok(())
}
#[cfg(test)]
mod tests {
use super::run_memories_startup_pipeline;
use crate::codex::make_session_and_context;
use crate::config::test_config;
use std::sync::Arc;
#[tokio::test]
async fn startup_pipeline_is_noop_when_state_db_is_unavailable() {
let (session, _turn_context) = make_session_and_context().await;
let session = Arc::new(session);
let config = Arc::new(test_config());
run_memories_startup_pipeline(&session, config)
.await
.expect("startup pipeline should skip cleanly without state db");
}
}

View File

@@ -2,7 +2,9 @@ use crate::agent::AgentStatus;
use crate::agent::status::is_final as is_final_agent_status;
use crate::codex::Session;
use codex_protocol::ThreadId;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::watch;
use tracing::debug;
use tracing::info;
use tracing::warn;
@@ -21,32 +23,50 @@ pub(super) fn spawn_phase2_completion_task(
let agent_control = session.services.agent_control.clone();
tokio::spawn(async move {
let Some(state_db) = state_db.as_deref() else {
let Some(state_db) = state_db else {
return;
};
let mut status_rx = match agent_control.subscribe_status(consolidation_agent_id).await {
let status_rx = match agent_control.subscribe_status(consolidation_agent_id).await {
Ok(status_rx) => status_rx,
Err(err) => {
warn!(
"failed to subscribe to global memory consolidation agent {consolidation_agent_id}: {err}"
);
let _ = state_db
.mark_global_phase2_job_failed(
&ownership_token,
"failed to subscribe to consolidation agent status",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
mark_phase2_failed_with_recovery(
state_db.as_ref(),
&ownership_token,
"failed to subscribe to consolidation agent status",
)
.await;
return;
}
};
run_phase2_completion_task(
Arc::clone(&state_db),
ownership_token,
completion_watermark,
consolidation_agent_id,
status_rx,
)
.await;
});
}
async fn run_phase2_completion_task(
state_db: Arc<codex_state::StateRuntime>,
ownership_token: String,
completion_watermark: i64,
consolidation_agent_id: ThreadId,
mut status_rx: watch::Receiver<AgentStatus>,
) {
let final_status = {
let mut heartbeat_interval =
tokio::time::interval(Duration::from_secs(PHASE_TWO_JOB_HEARTBEAT_SECONDS));
heartbeat_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
let final_status = loop {
loop {
let status = status_rx.borrow().clone();
if is_final_agent_status(&status) {
break status;
@@ -68,55 +88,84 @@ pub(super) fn spawn_phase2_completion_task(
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 heartbeat lost global ownership; skipping finalization"
warn!(
"memory phase-2 heartbeat lost global ownership; finalizing as failure"
);
break AgentStatus::Errored(
"lost global phase-2 ownership during heartbeat".to_string(),
);
return;
}
Err(err) => {
warn!(
"state db heartbeat_global_phase2_job failed during memories startup: {err}"
);
return;
break AgentStatus::Errored(format!(
"phase-2 heartbeat update failed: {err}"
));
}
}
}
}
};
if is_phase2_success(&final_status) {
match state_db
.mark_global_phase2_job_succeeded(&ownership_token, completion_watermark)
.await
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 success finalization skipped after global ownership changed"
);
}
Err(err) => {
warn!(
"state db mark_global_phase2_job_succeeded failed during memories startup: {err}"
);
}
}
info!(
"memory phase-2 global consolidation agent finished: agent_id={consolidation_agent_id} final_status={final_status:?}"
);
return;
}
};
let failure_reason = phase2_failure_reason(&final_status);
if is_phase2_success(&final_status) {
match state_db
.mark_global_phase2_job_failed(
&ownership_token,
&failure_reason,
.mark_global_phase2_job_succeeded(&ownership_token, completion_watermark)
.await
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 success finalization skipped after global ownership changed"
);
}
Err(err) => {
warn!(
"state db mark_global_phase2_job_succeeded failed during memories startup: {err}"
);
}
}
info!(
"memory phase-2 global consolidation agent finished: agent_id={consolidation_agent_id} final_status={final_status:?}"
);
return;
}
let failure_reason = phase2_failure_reason(&final_status);
mark_phase2_failed_with_recovery(state_db.as_ref(), &ownership_token, &failure_reason).await;
warn!(
"memory phase-2 global consolidation agent finished with non-success status: agent_id={consolidation_agent_id} final_status={final_status:?}"
);
}
async fn mark_phase2_failed_with_recovery(
state_db: &codex_state::StateRuntime,
ownership_token: &str,
failure_reason: &str,
) {
match state_db
.mark_global_phase2_job_failed(
ownership_token,
failure_reason,
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await
{
Ok(true) => {}
Ok(false) => match state_db
.mark_global_phase2_job_failed_if_unowned(
ownership_token,
failure_reason,
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await
{
Ok(true) => {}
Ok(true) => {
debug!(
"memory phase-2 failure finalization applied fallback update for unowned running job"
);
}
Ok(false) => {
debug!(
"memory phase-2 failure finalization skipped after global ownership changed"
@@ -124,14 +173,14 @@ pub(super) fn spawn_phase2_completion_task(
}
Err(err) => {
warn!(
"state db mark_global_phase2_job_failed failed during memories startup: {err}"
"state db mark_global_phase2_job_failed_if_unowned failed during memories startup: {err}"
);
}
},
Err(err) => {
warn!("state db mark_global_phase2_job_failed failed during memories startup: {err}");
}
warn!(
"memory phase-2 global consolidation agent finished with non-success status: agent_id={consolidation_agent_id} final_status={final_status:?}"
);
});
}
}
fn is_phase2_success(final_status: &AgentStatus) -> bool {
@@ -146,7 +195,12 @@ fn phase2_failure_reason(final_status: &AgentStatus) -> String {
mod tests {
use super::is_phase2_success;
use super::phase2_failure_reason;
use super::run_phase2_completion_task;
use crate::agent::AgentStatus;
use codex_protocol::ThreadId;
use codex_state::Phase2JobClaimOutcome;
use pretty_assertions::assert_eq;
use std::sync::Arc;
#[test]
fn phase2_success_only_for_completed_status() {
@@ -164,4 +218,167 @@ mod tests {
assert!(reason.contains("consolidation agent finished with status"));
assert!(reason.contains("boom"));
}
#[tokio::test]
async fn phase2_completion_marks_succeeded_for_completed_status() {
let codex_home = tempfile::tempdir().expect("create temp codex home");
let state_db = Arc::new(
codex_state::StateRuntime::init(
codex_home.path().to_path_buf(),
"test-provider".to_string(),
None,
)
.await
.expect("initialize state runtime"),
);
let owner = ThreadId::new();
state_db
.enqueue_global_consolidation(123)
.await
.expect("enqueue global consolidation");
let claim = state_db
.try_claim_global_phase2_job(owner, 3_600)
.await
.expect("claim global phase-2 job");
let ownership_token = match claim {
Phase2JobClaimOutcome::Claimed {
ownership_token, ..
} => ownership_token,
other => panic!("unexpected phase-2 claim outcome: {other:?}"),
};
let (_status_tx, status_rx) = tokio::sync::watch::channel(AgentStatus::Completed(None));
run_phase2_completion_task(
Arc::clone(&state_db),
ownership_token.clone(),
123,
ThreadId::new(),
status_rx,
)
.await;
let up_to_date_claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim up-to-date global job");
assert_eq!(up_to_date_claim, Phase2JobClaimOutcome::SkippedNotDirty);
state_db
.enqueue_global_consolidation(124)
.await
.expect("enqueue advanced consolidation watermark");
let rerun_claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim rerun global job");
assert!(
matches!(rerun_claim, Phase2JobClaimOutcome::Claimed { .. }),
"advanced watermark should be claimable after success finalization"
);
}
#[tokio::test]
async fn phase2_completion_marks_failed_when_status_updates_are_lost() {
let codex_home = tempfile::tempdir().expect("create temp codex home");
let state_db = Arc::new(
codex_state::StateRuntime::init(
codex_home.path().to_path_buf(),
"test-provider".to_string(),
None,
)
.await
.expect("initialize state runtime"),
);
state_db
.enqueue_global_consolidation(456)
.await
.expect("enqueue global consolidation");
let claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim global phase-2 job");
let ownership_token = match claim {
Phase2JobClaimOutcome::Claimed {
ownership_token, ..
} => ownership_token,
other => panic!("unexpected phase-2 claim outcome: {other:?}"),
};
let (status_tx, status_rx) = tokio::sync::watch::channel(AgentStatus::Running);
drop(status_tx);
run_phase2_completion_task(
Arc::clone(&state_db),
ownership_token,
456,
ThreadId::new(),
status_rx,
)
.await;
let claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim after failure finalization");
assert_eq!(
claim,
Phase2JobClaimOutcome::SkippedNotDirty,
"failure finalization should leave global job in retry-backoff, not running ownership"
);
}
#[tokio::test]
async fn phase2_completion_heartbeat_loss_does_not_steal_active_other_owner() {
let codex_home = tempfile::tempdir().expect("create temp codex home");
let state_db = Arc::new(
codex_state::StateRuntime::init(
codex_home.path().to_path_buf(),
"test-provider".to_string(),
None,
)
.await
.expect("initialize state runtime"),
);
state_db
.enqueue_global_consolidation(789)
.await
.expect("enqueue global consolidation");
let claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim global phase-2 job");
let claimed_token = match claim {
Phase2JobClaimOutcome::Claimed {
ownership_token, ..
} => ownership_token,
other => panic!("unexpected phase-2 claim outcome: {other:?}"),
};
let (_status_tx, status_rx) = tokio::sync::watch::channel(AgentStatus::Running);
run_phase2_completion_task(
Arc::clone(&state_db),
"non-owner-token".to_string(),
789,
ThreadId::new(),
status_rx,
)
.await;
let claim = state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim after heartbeat ownership loss");
assert_eq!(
claim,
Phase2JobClaimOutcome::SkippedRunning,
"heartbeat ownership-loss handling should not steal a live owner lease"
);
assert_eq!(
state_db
.mark_global_phase2_job_succeeded(claimed_token.as_str(), 789)
.await
.expect("mark original owner success"),
true,
"the original owner should still be able to finalize"
);
}
}