Defer persistence of rollout file (#11028)

- Defer rollout persistence for fresh threads (`InitialHistory::New`):
keep rollout events in memory and only materialize rollout file + state
DB row on first `EventMsg::UserMessage`.
- Keep precomputed rollout path available before materialization.
- Change `thread/start` to build thread response from live config
snapshot and optional precomputed path.
- Improve pre-materialization behavior in app-server/TUI: clearer
invalid-request errors for file-backed ops and a friendlier `/fork` “not
ready yet” UX.
- Update tests to match deferred semantics across
start/read/archive/unarchive/fork/resume/review flows.
- Improved resilience of user_shell test, which should be unrelated to
this change but must be affected by timing changes

For Reviewers:
* The primary change is in recorder.rs
* Most of the other changes were to fix up broken assumptions in
existing tests

Testing:
* Manually tested CLI
* Exercised app server paths by manually running IDE Extension with
rebuilt CLI binary
* Only user-visible change is that `/fork` in TUI generates visible
error if used prior to first turn
This commit is contained in:
Eric Traut
2026-02-07 23:05:03 -08:00
committed by GitHub
parent 6d08298f4e
commit b3de6c7f2b
19 changed files with 983 additions and 195 deletions

View File

@@ -2,7 +2,12 @@
use anyhow::Result;
use codex_core::features::Feature;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_function_call_agent_response;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
@@ -26,6 +31,18 @@ async fn get_memory_tool_returns_persisted_thread_memory() -> Result<()> {
let thread_id = test.session_configured.session_id;
let thread_id_string = thread_id.to_string();
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-init"),
ev_assistant_message("msg-init", "Materialized"),
ev_completed("resp-init"),
]),
)
.await;
test.submit_turn("materialize thread before memory write")
.await?;
let mut thread_exists = false;
// Wait for DB creation.
for _ in 0..100 {

View File

@@ -176,6 +176,7 @@ async fn find_locates_rollout_file_written_by_recorder() -> std::io::Result<()>
None,
)
.await?;
recorder.persist().await?;
recorder.flush().await?;
let index_path = home.path().join("session_index.jsonl");

View File

@@ -43,6 +43,18 @@ async fn new_thread_is_recorded_in_state_db() -> Result<()> {
}
let db = test.codex.state_db().expect("state db enabled");
assert!(
!rollout_path.exists(),
"fresh thread rollout should not be materialized before first user message"
);
let initial_metadata = db.get_thread(thread_id).await?;
assert!(
initial_metadata.is_none(),
"fresh thread should not be recorded in state db before first user message"
);
test.submit_turn("materialize rollout").await?;
let mut metadata = None;
for _ in 0..100 {
@@ -56,6 +68,10 @@ async fn new_thread_is_recorded_in_state_db() -> Result<()> {
let metadata = metadata.expect("thread should exist in state db");
assert_eq!(metadata.id, thread_id);
assert_eq!(metadata.rollout_path, rollout_path);
assert!(
rollout_path.exists(),
"rollout should be materialized after first user message"
);
Ok(())
}

View File

@@ -23,6 +23,7 @@ use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
use regex_lite::escape;
use std::path::PathBuf;
use tempfile::TempDir;
@@ -99,11 +100,11 @@ async fn user_shell_cmd_can_be_interrupted() {
// Set up isolated config and conversation.
let server = start_mock_server().await;
let mut builder = test_codex();
let codex = builder
let fixture = builder
.build(&server)
.await
.expect("create new conversation")
.codex;
.expect("create new conversation");
let codex = &fixture.codex;
// Start a long-running command and then interrupt it.
let sleep_cmd = "sleep 5".to_string();
@@ -113,11 +114,22 @@ async fn user_shell_cmd_can_be_interrupted() {
.unwrap();
// Wait until it has started (ExecCommandBegin), then interrupt.
let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandBegin(_))).await;
let _begin = wait_for_event_match(codex, |ev| match ev {
EventMsg::ExecCommandBegin(event) if event.source == ExecCommandSource::UserShell => {
Some(event.clone())
}
_ => None,
})
.await;
codex.submit(Op::Interrupt).await.unwrap();
// Expect a TurnAborted(Interrupted) notification.
let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnAborted(_))).await;
let msg = wait_for_event_with_timeout(
codex,
|ev| matches!(ev, EventMsg::TurnAborted(_)),
Duration::from_secs(60),
)
.await;
let EventMsg::TurnAborted(ev) = msg else {
unreachable!()
};