mirror of
https://github.com/openai/codex.git
synced 2026-04-29 08:56:38 +00:00
[codex] Add rollback context duplication snapshot (#15562)
## What changed - adds a targeted snapshot test for rollback with contextual diffs in `codex_tests.rs` - snapshots the exact model-visible request input before the rolled-back turn and on the follow-up request after rollback - shows the duplicate developer and environment context pair appearing again before the follow-up user message ## Why Rollback currently rewinds the reference context baseline without rewinding the live session overrides. On the next turn, the same contextual diff is emitted again and duplicated in the request sent to the model. ## Impact - makes the regression visible in a canonical snapshot test - keeps the snapshot on the shared `context_snapshot` path without adding new formatting helpers - gives a direct repro for future fixes to rollback/context reconstruction --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
67c1c7c054
commit
0f34b14b41
@@ -16,6 +16,9 @@ use codex_core::ThreadManager;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_protocol::config_types::CollaborationMode;
|
||||
use codex_protocol::config_types::ModeKind;
|
||||
use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::WarningEvent;
|
||||
@@ -502,6 +505,143 @@ async fn snapshot_rollback_past_compaction_replays_append_only_history() -> Resu
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
/// Scenario: rolling back a turn that introduced persistent pre-turn context
|
||||
/// diffs currently duplicates those context updates on the next request.
|
||||
async fn snapshot_rollback_followup_turn_duplicates_context_updates() -> Result<()> {
|
||||
if network_disabled() {
|
||||
println!("Skipping test because network is disabled in this sandbox");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
const MODEL: &str = "gpt-5.1-codex";
|
||||
const TURN_ONE_USER: &str = "turn 1 user";
|
||||
const TURN_TWO_USER: &str = "turn 2 user";
|
||||
const FOLLOWUP_USER: &str = "follow-up user";
|
||||
const ROLLED_BACK_DEV_INSTRUCTIONS: &str = "ROLLED_BACK_DEV_INSTRUCTIONS";
|
||||
const PRETURN_CONTEXT_DIFF_CWD: &str = "PRETURN_CONTEXT_DIFF_CWD";
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let request_log = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_assistant_message("m1", "turn 1 assistant"),
|
||||
ev_completed("r1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("m2", "turn 2 assistant"),
|
||||
ev_completed("r2"),
|
||||
]),
|
||||
sse(vec![ev_completed("r3")]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let (_home, config, _manager, conversation) =
|
||||
start_test_conversation(&server, Some(MODEL)).await;
|
||||
|
||||
user_turn(&conversation, TURN_ONE_USER).await;
|
||||
|
||||
let override_cwd = config.cwd.join(PRETURN_CONTEXT_DIFF_CWD);
|
||||
std::fs::create_dir_all(&override_cwd)?;
|
||||
conversation
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: Some(override_cwd),
|
||||
approval_policy: None,
|
||||
approvals_reviewer: None,
|
||||
sandbox_policy: None,
|
||||
windows_sandbox_level: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
service_tier: None,
|
||||
collaboration_mode: Some(CollaborationMode {
|
||||
mode: ModeKind::Default,
|
||||
settings: Settings {
|
||||
model: MODEL.to_string(),
|
||||
reasoning_effort: None,
|
||||
developer_instructions: Some(ROLLED_BACK_DEV_INSTRUCTIONS.to_string()),
|
||||
},
|
||||
}),
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
user_turn(&conversation, TURN_TWO_USER).await;
|
||||
|
||||
conversation
|
||||
.submit(Op::ThreadRollback { num_turns: 1 })
|
||||
.await?;
|
||||
let rollback_event = wait_for_event(&conversation, |ev| {
|
||||
matches!(ev, EventMsg::ThreadRolledBack(_))
|
||||
})
|
||||
.await;
|
||||
let EventMsg::ThreadRolledBack(rollback_event) = rollback_event else {
|
||||
panic!("expected thread rolled back event");
|
||||
};
|
||||
assert_eq!(rollback_event.num_turns, 1);
|
||||
|
||||
user_turn(&conversation, FOLLOWUP_USER).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(requests.len(), 3);
|
||||
|
||||
assert_eq!(
|
||||
requests[1]
|
||||
.message_input_texts("developer")
|
||||
.iter()
|
||||
.filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS))
|
||||
.count(),
|
||||
1
|
||||
);
|
||||
assert_eq!(
|
||||
requests[1]
|
||||
.message_input_texts("user")
|
||||
.iter()
|
||||
.filter(|text| text.contains(PRETURN_CONTEXT_DIFF_CWD))
|
||||
.count(),
|
||||
1
|
||||
);
|
||||
assert_eq!(
|
||||
requests[2]
|
||||
.message_input_texts("developer")
|
||||
.iter()
|
||||
.filter(|text| text.contains(ROLLED_BACK_DEV_INSTRUCTIONS))
|
||||
.count(),
|
||||
2
|
||||
);
|
||||
|
||||
let after_rollback_user_texts = requests[2].message_input_texts("user");
|
||||
assert_eq!(
|
||||
after_rollback_user_texts
|
||||
.iter()
|
||||
.filter(|text| text.contains(PRETURN_CONTEXT_DIFF_CWD))
|
||||
.count(),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
after_rollback_user_texts.last().map(String::as_str),
|
||||
Some(FOLLOWUP_USER)
|
||||
);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"rollback_followup_turn_duplicates_context_updates",
|
||||
context_snapshot::format_labeled_requests_snapshot(
|
||||
"rollback currently duplicates pre-turn override context updates on the follow-up request",
|
||||
&[
|
||||
("rolled-back turn request", &requests[1]),
|
||||
("follow-up request after rollback", &requests[2]),
|
||||
],
|
||||
&ContextSnapshotOptions::default()
|
||||
.strip_capability_instructions()
|
||||
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 96 }),
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn normalize_line_endings(value: &mut Value) {
|
||||
match value {
|
||||
Value::String(text) => {
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
source: core/tests/suite/compact_resume_fork.rs
|
||||
expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback currently duplicates pre-turn override context updates on the follow-up request\",\n&[(\"rolled-back turn request\", &requests[1]),\n(\"follow-up request after rollback\", &requests[2]),],\n&ContextSnapshotOptions::default().strip_capability_instructions().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 96 }),)"
|
||||
---
|
||||
Scenario: rollback currently duplicates pre-turn override context updates on the follow-up request
|
||||
|
||||
## rolled-back turn request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/user:turn 1 user
|
||||
03:message/assistant:turn 1 assistant
|
||||
04:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
|
||||
05:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
06:message/user:turn 2 user
|
||||
|
||||
## follow-up request after rollback
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/user:turn 1 user
|
||||
03:message/assistant:turn 1 assistant
|
||||
04:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
|
||||
05:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
06:message/developer:<collaboration_mode>ROLLED_BACK_DEV_INSTRUCTIONS</collaboration_mode>
|
||||
07:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
08:message/user:follow-up user
|
||||
Reference in New Issue
Block a user