[codex] Add rollback context duplication snapshot (#15562)

## What changed
- adds a targeted snapshot test for rollback with contextual diffs in
`codex_tests.rs`
- snapshots the exact model-visible request input before the rolled-back
turn and on the follow-up request after rollback
- shows the duplicate developer and environment context pair appearing
again before the follow-up user message

## Why
Rollback currently rewinds the reference context baseline without
rewinding the live session overrides. On the next turn, the same
contextual diff is emitted again and duplicated in the request sent to
the model.

## Impact
- makes the regression visible in a canonical snapshot test
- keeps the snapshot on the shared `context_snapshot` path without
adding new formatting helpers
- gives a direct repro for future fixes to rollback/context
reconstruction

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charley Cunningham
2026-03-23 15:36:23 -07:00
committed by GitHub
parent 67c1c7c054
commit 0f34b14b41
2 changed files with 165 additions and 0 deletions

View File

@@ -16,6 +16,9 @@ use codex_core::ThreadManager;
use codex_core::compact::SUMMARIZATION_PROMPT;
use codex_core::config::Config;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::ModeKind;
use codex_protocol::config_types::Settings;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::WarningEvent;
@@ -502,6 +505,143 @@ async fn snapshot_rollback_past_compaction_replays_append_only_history() -> Resu
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
/// Scenario: rolling back a turn that introduced persistent pre-turn context
/// diffs currently duplicates those context updates on the next request.
async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result<()> {
if network_disabled() {
println!("Skipping test because network is disabled in this sandbox");
return Ok(());
}
const MODEL: &str = "gpt-5.1-codex";
const TURN_ONE_USER: &str = "turn 1 user";
const TURN_TWO_USER: &str = "turn 2 user";
const FOLLOWUP_USER: &str = "follow-up user";
const ROLLED_BACK_DEV_INSTRUCTIONS: &str = "ROLLED_BACK_DEV_INSTRUCTIONS";
const PRETURN_CONTEXT_DIFF_CWD: &str = "PRETURN_CONTEXT_DIFF_CWD";
let server = MockServer::start().await;
let request_log = mount_sse_sequence(
&server,
vec![
sse(vec![
ev_assistant_message("m1", "turn 1 assistant"),
ev_completed("r1"),
]),
sse(vec![
ev_assistant_message("m2", "turn 2 assistant"),
ev_completed("r2"),
]),
sse(vec![ev_completed("r3")]),
],
)
.await;
let (_home, config, _manager, conversation) =
start_test_conversation(&server, Some(MODEL)).await;
user_turn(&conversation, TURN_ONE_USER).await;
let override_cwd = config.cwd.join(PRETURN_CONTEXT_DIFF_CWD);
std::fs::create_dir_all(&override_cwd)?;
conversation
.submit(Op::OverrideTurnContext {
cwd: Some(override_cwd),
approval_policy: None,
approvals_reviewer: None,
sandbox_policy: None,
windows_sandbox_level: None,
model: None,
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(CollaborationMode {
mode: ModeKind::Default,
settings: Settings {
model: MODEL.to_string(),
reasoning_effort: None,
developer_instructions: Some(ROLLED_BACK_DEV_INSTRUCTIONS.to_string()),
},
}),
personality: None,
})
.await?;
user_turn(&conversation, TURN_TWO_USER).await;
conversation
.submit(Op::ThreadRollback { num_turns: 1 })
.await?;
let rollback_event = wait_for_event(&conversation, |ev| {
matches!(ev, EventMsg::ThreadRolledBack(_))
})
.await;
let EventMsg::ThreadRolledBack(rollback_event) = rollback_event else {
panic!("expected thread rolled back event");
};
assert_eq!(rollback_event.num_turns, 1);
user_turn(&conversation, FOLLOWUP_USER).await;
let requests = request_log.requests();
assert_eq!(requests.len(), 3);
assert_eq!(
requests[1]
.message_input_texts("developer")
.iter()
.filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS))
.count(),
1
);
assert_eq!(
requests[1]
.message_input_texts("user")
.iter()
.filter(|text| text.contains(PRETURN_CONTEXT_DIFF_CWD))
.count(),
1
);
assert_eq!(
requests[2]
.message_input_texts("developer")
.iter()
.filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS))
.count(),
2
);
let after_rollback_user_texts = requests[2].message_input_texts("user");
assert_eq!(
after_rollback_user_texts
.iter()
.filter(|text| text.contains(PRETURN_CONTEXT_DIFF_CWD))
.count(),
2
);
assert_eq!(
after_rollback_user_texts.last().map(String::as_str),
Some(FOLLOWUP_USER)
);
insta::assert_snapshot!(
"rollback_followup_turn_duplicates_context_updates",
context_snapshot::format_labeled_requests_snapshot(
"rollback currently duplicates pre-turn override context updates on the follow-up request",
&[
("rolled-back turn request", &requests[1]),
("follow-up request after rollback", &requests[2]),
],
&ContextSnapshotOptions::default()
.strip_capability_instructions()
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 96 }),
)
);
Ok(())
}
fn normalize_line_endings(value: &mut Value) {
match value {
Value::String(text) => {

View File

@@ -0,0 +1,25 @@
---
source: core/tests/suite/compact_resume_fork.rs
expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback currently duplicates pre-turn override context updates on the follow-up request\",\n&[(\"rolled-back turn request\", &requests[1]),\n(\"follow-up request after rollback\", &requests[2]),],\n&ContextSnapshotOptions::default().strip_capability_instructions().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 96 }),)"
---
Scenario: rollback currently duplicates pre-turn override context updates on the follow-up request
## rolled-back turn request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:turn 1 user
03:message/assistant:turn 1 assistant
04:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
05:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
06:message/user:turn 2 user
## follow-up request after rollback
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:turn 1 user
03:message/assistant:turn 1 assistant
04:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
05:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
06:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
07:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
08:message/user:follow-up user