mirror of
https://github.com/openai/codex.git
synced 2026-05-28 15:00:16 +00:00
Replace impossible same-turn realtime snapshots
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -7,19 +7,25 @@ use std::path::PathBuf;
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::compact::SUMMARY_PREFIX;
|
||||
use codex_core::config::Constrained;
|
||||
use codex_core::features::Feature;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ConversationStartParams;
|
||||
use codex_protocol::protocol::ErrorEvent;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ExecApprovalRequestEvent;
|
||||
use codex_protocol::protocol::ItemCompletedEvent;
|
||||
use codex_protocol::protocol::ItemStartedEvent;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
|
||||
use codex_protocol::protocol::RealtimeEvent;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::TurnContextItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::context_snapshot;
|
||||
@@ -77,6 +83,22 @@ fn format_labeled_requests_snapshot(
|
||||
)
|
||||
}
|
||||
|
||||
fn format_labeled_requests_snapshot_with_normalized_exec_output(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &responses::ResponsesRequest)],
|
||||
) -> String {
|
||||
format_labeled_requests_snapshot(scenario, sections)
|
||||
.lines()
|
||||
.map(|line| {
|
||||
line.split_once("function_call_output:").map_or_else(
|
||||
|| line.to_string(),
|
||||
|(prefix, _)| format!("{prefix}function_call_output:<UNIFIED_EXEC_OUTPUT>"),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn compacted_summary_only_output(summary: &str) -> Vec<ResponseItem> {
|
||||
vec![ResponseItem::Compaction {
|
||||
encrypted_content: summary_with_prefix(summary),
|
||||
@@ -163,10 +185,7 @@ fn read_rollout_lines(path: &Path) -> Result<Vec<RolloutLine>> {
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn rewrite_rollout_with_additional_turn_context(
|
||||
path: &Path,
|
||||
inserted_turn_context_items: &[TurnContextItem],
|
||||
) -> Result<()> {
|
||||
fn first_turn_context_items(path: &Path) -> Result<Vec<TurnContextItem>> {
|
||||
let rollout_lines = read_rollout_lines(path)?;
|
||||
let turn_id = rollout_lines
|
||||
.iter()
|
||||
@@ -175,33 +194,43 @@ fn rewrite_rollout_with_additional_turn_context(
|
||||
_ => None,
|
||||
})
|
||||
.expect("rollout should include a turn context item");
|
||||
let mut inserted = false;
|
||||
let mut rewritten = Vec::with_capacity(rollout_lines.len() + inserted_turn_context_items.len());
|
||||
|
||||
for line in rollout_lines {
|
||||
if !inserted
|
||||
&& let RolloutItem::EventMsg(EventMsg::TurnComplete(event)) = &line.item
|
||||
&& event.turn_id == turn_id
|
||||
{
|
||||
for (idx, turn_context_item) in inserted_turn_context_items.iter().enumerate() {
|
||||
rewritten.push(RolloutLine {
|
||||
timestamp: format!("2000-01-01T00:00:{idx:02}Z"),
|
||||
item: RolloutItem::TurnContext(turn_context_item.clone()),
|
||||
});
|
||||
}
|
||||
inserted = true;
|
||||
}
|
||||
rewritten.push(line);
|
||||
}
|
||||
|
||||
assert!(inserted, "expected to insert additional turn context items");
|
||||
let serialized = rewritten
|
||||
Ok(rollout_lines
|
||||
.into_iter()
|
||||
.map(|line| serde_json::to_string(&line))
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?
|
||||
.join("\n");
|
||||
fs::write(path, format!("{serialized}\n"))?;
|
||||
Ok(())
|
||||
.filter_map(|line| match line.item {
|
||||
RolloutItem::TurnContext(ctx) if ctx.turn_id.as_deref() == Some(turn_id.as_str()) => {
|
||||
Some(ctx)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn expect_exec_approval(
|
||||
codex: &codex_core::CodexThread,
|
||||
expected_command: &str,
|
||||
) -> ExecApprovalRequestEvent {
|
||||
let event = wait_for_event(codex, |event| {
|
||||
matches!(
|
||||
event,
|
||||
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
match event {
|
||||
EventMsg::ExecApprovalRequest(approval) => {
|
||||
let last_arg = approval
|
||||
.command
|
||||
.last()
|
||||
.map(String::as_str)
|
||||
.unwrap_or_default();
|
||||
assert_eq!(last_arg, expected_command);
|
||||
approval
|
||||
}
|
||||
EventMsg::TurnComplete(_) => panic!("expected approval request before completion"),
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_request_contains_realtime_start(request: &responses::ResponsesRequest) {
|
||||
@@ -236,6 +265,28 @@ fn assert_request_omits_realtime_update(request: &responses::ResponsesRequest) {
|
||||
);
|
||||
}
|
||||
|
||||
fn realtime_end_message_count(request: &responses::ResponsesRequest) -> usize {
|
||||
request
|
||||
.message_input_texts("developer")
|
||||
.iter()
|
||||
.filter(|text| text.contains("Reason: inactive"))
|
||||
.count()
|
||||
}
|
||||
|
||||
fn ev_exec_command_call_requiring_approval(call_id: &str, command: &str) -> serde_json::Value {
|
||||
responses::ev_function_call(
|
||||
call_id,
|
||||
"exec_command",
|
||||
&serde_json::to_string(&json!({
|
||||
"cmd": command,
|
||||
"yield_time_ms": 1_000_u64,
|
||||
"sandbox_permissions": codex_protocol::models::SandboxPermissions::RequireEscalated,
|
||||
"justification": "Need to continue the paused turn"
|
||||
}))
|
||||
.expect("serialize exec_command arguments"),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -1938,13 +1989,119 @@ async fn snapshot_request_shape_remote_compact_resume_restates_realtime_end() ->
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_remote_resume_uses_latest_same_turn_turn_context_item() -> Result<()>
|
||||
{
|
||||
async fn snapshot_request_shape_remote_mid_turn_realtime_end() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = wiremock::MockServer::start().await;
|
||||
let realtime_server = start_remote_realtime_server().await;
|
||||
let mut builder = remote_realtime_test_codex_builder(&realtime_server);
|
||||
let mut builder = remote_realtime_test_codex_builder(&realtime_server).with_config(|config| {
|
||||
config.features.enable(Feature::RequestPermissions);
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.permissions.sandbox_policy =
|
||||
Constrained::allow_any(SandboxPolicy::new_read_only_policy());
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let rollout_path = test
|
||||
.session_configured
|
||||
.rollout_path
|
||||
.clone()
|
||||
.expect("rollout path");
|
||||
|
||||
let shell_command = "printf mid-turn-realtime-end";
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
ev_exec_command_call_requiring_approval(
|
||||
"call-remote-mid-turn-realtime-end",
|
||||
shell_command,
|
||||
),
|
||||
responses::ev_completed("r1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_MID_TURN_REALTIME_ENDED_REPLY"),
|
||||
responses::ev_completed("r2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
start_realtime_conversation(test.codex.as_ref()).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(test.codex.as_ref(), shell_command).await;
|
||||
close_realtime_conversation(test.codex.as_ref()).await?;
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
test.codex.submit(Op::Shutdown).await?;
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
|
||||
|
||||
let turn_context_items = first_turn_context_items(&rollout_path)?;
|
||||
assert_eq!(
|
||||
turn_context_items.len(),
|
||||
2,
|
||||
"expected two same-turn context snapshots"
|
||||
);
|
||||
assert_eq!(
|
||||
turn_context_items
|
||||
.last()
|
||||
.and_then(|item| item.realtime_active),
|
||||
Some(false)
|
||||
);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(requests.len(), 2, "expected two model requests");
|
||||
|
||||
let first_request = &requests[0];
|
||||
let second_request = &requests[1];
|
||||
assert_request_contains_realtime_end(second_request);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"remote_mid_turn_realtime_end_shapes",
|
||||
format_labeled_requests_snapshot_with_normalized_exec_output(
|
||||
"Realtime closes while the turn is paused on exec approval. The continuation request in the same turn emits the realtime-ended developer update, and the rollout persists a later same-turn TurnContext snapshot with realtime inactive.",
|
||||
&[
|
||||
("First Request Before Realtime Close", first_request),
|
||||
(
|
||||
"Same-Turn Continuation After Realtime Close",
|
||||
second_request
|
||||
),
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
realtime_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_remote_resume_after_same_turn_realtime_end_uses_latest_turn_context_item()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = wiremock::MockServer::start().await;
|
||||
let realtime_server = start_remote_realtime_server().await;
|
||||
let mut builder = remote_realtime_test_codex_builder(&realtime_server).with_config(|config| {
|
||||
config.features.enable(Feature::RequestPermissions);
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.permissions.sandbox_policy =
|
||||
Constrained::allow_any(SandboxPolicy::new_read_only_policy());
|
||||
});
|
||||
let initial = builder.build(&server).await?;
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial
|
||||
@@ -1953,16 +2110,24 @@ async fn snapshot_request_shape_remote_resume_uses_latest_same_turn_turn_context
|
||||
.clone()
|
||||
.expect("rollout path");
|
||||
|
||||
let shell_command = "printf mid-turn-realtime-end";
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 60),
|
||||
ev_exec_command_call_requiring_approval(
|
||||
"call-remote-resume-mid-turn-realtime-end",
|
||||
shell_command,
|
||||
),
|
||||
responses::ev_completed("r1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_AFTER_RESUME_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
responses::ev_assistant_message("m2", "REMOTE_MID_TURN_REALTIME_ENDED_REPLY"),
|
||||
responses::ev_completed("r2"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m3", "REMOTE_AFTER_RESUME_REPLY"),
|
||||
responses::ev_completed("r3"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
@@ -1980,6 +2145,17 @@ async fn snapshot_request_shape_remote_resume_uses_latest_same_turn_turn_context
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(initial.codex.as_ref(), shell_command).await;
|
||||
close_realtime_conversation(initial.codex.as_ref()).await?;
|
||||
initial
|
||||
.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
initial.codex.submit(Op::Shutdown).await?;
|
||||
@@ -1988,20 +2164,27 @@ async fn snapshot_request_shape_remote_resume_uses_latest_same_turn_turn_context
|
||||
})
|
||||
.await;
|
||||
|
||||
let first_turn_context_item = read_rollout_lines(&rollout_path)?
|
||||
.into_iter()
|
||||
.find_map(|line| match line.item {
|
||||
RolloutItem::TurnContext(ctx) => Some(ctx),
|
||||
_ => None,
|
||||
})
|
||||
.expect("rollout should include first-turn context");
|
||||
let mut latest_turn_context_item = first_turn_context_item;
|
||||
latest_turn_context_item.realtime_active = Some(false);
|
||||
rewrite_rollout_with_additional_turn_context(&rollout_path, &[latest_turn_context_item])?;
|
||||
let turn_context_items = first_turn_context_items(&rollout_path)?;
|
||||
assert_eq!(
|
||||
turn_context_items.len(),
|
||||
2,
|
||||
"expected two same-turn context snapshots"
|
||||
);
|
||||
assert_eq!(
|
||||
turn_context_items
|
||||
.last()
|
||||
.and_then(|item| item.realtime_active),
|
||||
Some(false)
|
||||
);
|
||||
|
||||
let mut resume_builder = remote_realtime_test_codex_builder(&realtime_server);
|
||||
let mut resume_builder =
|
||||
remote_realtime_test_codex_builder(&realtime_server).with_config(|config| {
|
||||
config.features.enable(Feature::RequestPermissions);
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.permissions.sandbox_policy =
|
||||
Constrained::allow_any(SandboxPolicy::new_read_only_policy());
|
||||
});
|
||||
let resumed = resume_builder.resume(&server, home, rollout_path).await?;
|
||||
start_realtime_conversation(resumed.codex.as_ref()).await?;
|
||||
|
||||
resumed
|
||||
.codex
|
||||
@@ -2016,36 +2199,48 @@ async fn snapshot_request_shape_remote_resume_uses_latest_same_turn_turn_context
|
||||
wait_for_event(&resumed.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(requests.len(), 2, "expected two model requests");
|
||||
assert_eq!(requests.len(), 3, "expected three model requests");
|
||||
|
||||
let initial_request = &requests[0];
|
||||
let after_resume_request = &requests[1];
|
||||
assert_request_contains_realtime_start(after_resume_request);
|
||||
let second_request = &requests[1];
|
||||
let after_resume_request = &requests[2];
|
||||
assert_request_contains_realtime_end(second_request);
|
||||
assert_eq!(
|
||||
realtime_end_message_count(after_resume_request),
|
||||
1,
|
||||
"expected resume to preserve exactly one historical realtime-ended message without emitting a duplicate update"
|
||||
);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"remote_resume_uses_latest_same_turn_turn_context_item_shapes",
|
||||
format_labeled_requests_snapshot(
|
||||
"Resume after a turn with two persisted TurnContext snapshots for the same turn_id: replay should treat the later inactive snapshot as authoritative, so starting realtime again before the resumed turn emits a fresh realtime-start instruction.",
|
||||
"remote_resume_after_same_turn_realtime_end_uses_latest_turn_context_item_shapes",
|
||||
format_labeled_requests_snapshot_with_normalized_exec_output(
|
||||
"A same-turn realtime-ended update is recorded before the turn completes. On resume, replay should use the later inactive TurnContext snapshot from that same turn and preserve that historical ended message exactly once instead of emitting a duplicate update.",
|
||||
&[
|
||||
("Last Request Before Resume", initial_request),
|
||||
(
|
||||
"Same-Turn Continuation After Realtime Close",
|
||||
second_request
|
||||
),
|
||||
("Remote Post-Resume History Layout", after_resume_request),
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
close_realtime_conversation(resumed.codex.as_ref()).await?;
|
||||
realtime_server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_remote_compact_resume_uses_latest_same_turn_turn_context_item()
|
||||
async fn snapshot_request_shape_remote_compact_resume_after_same_turn_realtime_end_uses_latest_turn_context_item()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = wiremock::MockServer::start().await;
|
||||
let realtime_server = start_remote_realtime_server().await;
|
||||
let mut builder = remote_realtime_test_codex_builder(&realtime_server);
|
||||
let mut builder = remote_realtime_test_codex_builder(&realtime_server).with_config(|config| {
|
||||
config.features.enable(Feature::RequestPermissions);
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.permissions.sandbox_policy =
|
||||
Constrained::allow_any(SandboxPolicy::new_read_only_policy());
|
||||
});
|
||||
let initial = builder.build(&server).await?;
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial
|
||||
@@ -2054,16 +2249,24 @@ async fn snapshot_request_shape_remote_compact_resume_uses_latest_same_turn_turn
|
||||
.clone()
|
||||
.expect("rollout path");
|
||||
|
||||
let shell_command = "printf mid-turn-realtime-end";
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 60),
|
||||
ev_exec_command_call_requiring_approval(
|
||||
"call-remote-compact-resume-mid-turn-realtime-end",
|
||||
shell_command,
|
||||
),
|
||||
responses::ev_completed("r1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_AFTER_RESUME_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
responses::ev_assistant_message("m2", "REMOTE_MID_TURN_REALTIME_ENDED_REPLY"),
|
||||
responses::ev_completed("r2"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m3", "REMOTE_AFTER_RESUME_REPLY"),
|
||||
responses::ev_completed("r3"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
@@ -2090,29 +2293,47 @@ async fn snapshot_request_shape_remote_compact_resume_uses_latest_same_turn_turn
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(initial.codex.as_ref(), shell_command).await;
|
||||
close_realtime_conversation(initial.codex.as_ref()).await?;
|
||||
initial
|
||||
.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
initial.codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
initial.codex.submit(Op::Shutdown).await?;
|
||||
wait_for_event(&initial.codex, |ev| {
|
||||
matches!(ev, EventMsg::ShutdownComplete)
|
||||
})
|
||||
.await;
|
||||
|
||||
let first_turn_context_item = read_rollout_lines(&rollout_path)?
|
||||
.into_iter()
|
||||
.find_map(|line| match line.item {
|
||||
RolloutItem::TurnContext(ctx) => Some(ctx),
|
||||
_ => None,
|
||||
})
|
||||
.expect("rollout should include first-turn context");
|
||||
let mut latest_turn_context_item = first_turn_context_item;
|
||||
latest_turn_context_item.realtime_active = Some(false);
|
||||
rewrite_rollout_with_additional_turn_context(&rollout_path, &[latest_turn_context_item])?;
|
||||
let turn_context_items = first_turn_context_items(&rollout_path)?;
|
||||
assert_eq!(
|
||||
turn_context_items.len(),
|
||||
2,
|
||||
"expected two same-turn context snapshots"
|
||||
);
|
||||
assert_eq!(
|
||||
turn_context_items
|
||||
.last()
|
||||
.and_then(|item| item.realtime_active),
|
||||
Some(false)
|
||||
);
|
||||
|
||||
let mut resume_builder = remote_realtime_test_codex_builder(&realtime_server);
|
||||
let mut resume_builder =
|
||||
remote_realtime_test_codex_builder(&realtime_server).with_config(|config| {
|
||||
config.features.enable(Feature::RequestPermissions);
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.permissions.sandbox_policy =
|
||||
Constrained::allow_any(SandboxPolicy::new_read_only_policy());
|
||||
});
|
||||
let resumed = resume_builder.resume(&server, home, rollout_path).await?;
|
||||
|
||||
resumed
|
||||
@@ -2129,18 +2350,22 @@ async fn snapshot_request_shape_remote_compact_resume_uses_latest_same_turn_turn
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(requests.len(), 2, "expected two model requests");
|
||||
assert_eq!(requests.len(), 3, "expected three model requests");
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
let after_resume_request = &requests[1];
|
||||
let second_request = &requests[1];
|
||||
let after_resume_request = &requests[2];
|
||||
assert_request_contains_realtime_end(second_request);
|
||||
assert_request_omits_realtime_update(after_resume_request);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"remote_compact_resume_uses_latest_same_turn_turn_context_item_shapes",
|
||||
format_labeled_requests_snapshot(
|
||||
"After /compact and resume, replay should rebuild previous-turn settings from the last TurnContext snapshot within the compacted turn. Here the earlier snapshot was realtime-active but the later same-turn snapshot was inactive, so the resumed request should not emit a stale realtime-end instruction.",
|
||||
"remote_compact_resume_after_same_turn_realtime_end_uses_latest_turn_context_item_shapes",
|
||||
format_labeled_requests_snapshot_with_normalized_exec_output(
|
||||
"A same-turn realtime-ended update is recorded before manual /compact. After compact and resume, replay should still use the later inactive TurnContext snapshot from that same turn and avoid re-emitting a stale realtime-ended developer update.",
|
||||
&[
|
||||
("Remote Compaction Request", &compact_request),
|
||||
(
|
||||
"Same-Turn Continuation After Realtime Close",
|
||||
second_request
|
||||
),
|
||||
("Remote Post-Resume History Layout", after_resume_request),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
assertion_line: 2334
|
||||
expression: "format_labeled_requests_snapshot_with_normalized_exec_output(\"A same-turn realtime-ended update is recorded before manual /compact. After compact and resume, replay should still use the later inactive TurnContext snapshot from that same turn and avoid re-emitting a stale realtime-ended developer update.\",\n&[(\"Same-Turn Continuation After Realtime Close\", second_request),\n(\"Remote Post-Resume History Layout\", after_resume_request),])"
|
||||
---
|
||||
Scenario: A same-turn realtime-ended update is recorded before manual /compact. After compact and resume, replay should still use the later inactive TurnContext snapshot from that same turn and avoid re-emitting a stale realtime-ended developer update.
|
||||
|
||||
## Same-Turn Continuation After Realtime Close
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/exec_command
|
||||
05:function_call_output:<UNIFIED_EXEC_OUTPUT>
|
||||
06:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nYou are...
|
||||
|
||||
## Remote Post-Resume History Layout
|
||||
00:compaction:encrypted=true
|
||||
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
02:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_TWO
|
||||
@@ -1,22 +0,0 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "format_labeled_requests_snapshot(\"After /compact and resume, replay should rebuild previous-turn settings from the last TurnContext snapshot within the compacted turn. Here the earlier snapshot was realtime-active but the later same-turn snapshot was inactive, so the resumed request should not emit a stale realtime-end instruction.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Resume History Layout\", after_resume_request),])"
|
||||
---
|
||||
Scenario: After /compact and resume, replay should rebuild previous-turn settings from the last TurnContext snapshot within the compacted turn. Here the earlier snapshot was realtime-active but the later same-turn snapshot was inactive, so the resumed request should not emit a stale realtime-end instruction.
|
||||
|
||||
## Remote Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_FIRST_REPLY
|
||||
|
||||
## Remote Post-Resume History Layout
|
||||
00:compaction:encrypted=true
|
||||
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
02:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_TWO
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
assertion_line: 2070
|
||||
expression: "format_labeled_requests_snapshot_with_normalized_exec_output(\"Realtime closes while the turn is paused on exec approval. The continuation request in the same turn emits the realtime-ended developer update, and the rollout persists a later same-turn TurnContext snapshot with realtime inactive.\",\n&[(\"First Request Before Realtime Close\", first_request),\n(\"Same-Turn Continuation After Realtime Close\", second_request),])"
|
||||
---
|
||||
Scenario: Realtime closes while the turn is paused on exec approval. The continuation request in the same turn emits the realtime-ended developer update, and the rollout persists a later same-turn TurnContext snapshot with realtime inactive.
|
||||
|
||||
## First Request Before Realtime Close
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
|
||||
## Same-Turn Continuation After Realtime Close
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/exec_command
|
||||
05:function_call_output:<UNIFIED_EXEC_OUTPUT>
|
||||
06:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nYou are...
|
||||
@@ -0,0 +1,31 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
assertion_line: 2198
|
||||
expression: "format_labeled_requests_snapshot_with_normalized_exec_output(\"A same-turn realtime-ended update is recorded before the turn completes. On resume, replay should use the later inactive TurnContext snapshot from that same turn and preserve that historical ended message exactly once instead of emitting a duplicate update.\",\n&[(\"Same-Turn Continuation After Realtime Close\", second_request),\n(\"Remote Post-Resume History Layout\", after_resume_request),])"
|
||||
---
|
||||
Scenario: A same-turn realtime-ended update is recorded before the turn completes. On resume, replay should use the later inactive TurnContext snapshot from that same turn and preserve that historical ended message exactly once instead of emitting a duplicate update.
|
||||
|
||||
## Same-Turn Continuation After Realtime Close
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/exec_command
|
||||
05:function_call_output:<UNIFIED_EXEC_OUTPUT>
|
||||
06:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nYou are...
|
||||
|
||||
## Remote Post-Resume History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/exec_command
|
||||
05:function_call_output:<UNIFIED_EXEC_OUTPUT>
|
||||
06:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nYou are...
|
||||
07:message/assistant:REMOTE_MID_TURN_REALTIME_ENDED_REPLY
|
||||
08:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
09:message/user:USER_TWO
|
||||
@@ -1,25 +0,0 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "format_labeled_requests_snapshot(\"Resume after a turn with two persisted TurnContext snapshots for the same turn_id: replay should treat the later inactive snapshot as authoritative, so starting realtime again before the resumed turn emits a fresh realtime-start instruction.\",\n&[(\"Last Request Before Resume\", initial_request),\n(\"Remote Post-Resume History Layout\", after_resume_request),])"
|
||||
---
|
||||
Scenario: Resume after a turn with two persisted TurnContext snapshots for the same turn_id: replay should treat the later inactive snapshot as authoritative, so starting realtime again before the resumed turn emits a fresh realtime-start instruction.
|
||||
|
||||
## Last Request Before Resume
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
|
||||
## Remote Post-Resume History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_FIRST_REPLY
|
||||
05:message/developer:<realtime_conversation>\nRealtime conversation started.\n\nYou a...
|
||||
06:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
07:message/user:USER_TWO
|
||||
Reference in New Issue
Block a user