mirror of
https://github.com/openai/codex.git
synced 2026-03-03 21:23:18 +00:00
Compare commits
14 Commits
fix/notify
...
cc/compact
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7c1979abe | ||
|
|
dc7ec6e201 | ||
|
|
0bd8ea1997 | ||
|
|
6264c63223 | ||
|
|
1e99d5f50c | ||
|
|
802378d37d | ||
|
|
a13f6a8683 | ||
|
|
c6722490e0 | ||
|
|
0fa584baac | ||
|
|
277dd15df3 | ||
|
|
04f6e5dfc4 | ||
|
|
dd24aa0731 | ||
|
|
37791ab7f0 | ||
|
|
44b775cfc3 |
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1708,6 +1708,7 @@ dependencies = [
|
||||
"include_dir",
|
||||
"indexmap 2.13.0",
|
||||
"indoc",
|
||||
"insta",
|
||||
"keyring",
|
||||
"landlock",
|
||||
"libc",
|
||||
|
||||
@@ -152,6 +152,7 @@ codex-utils-cargo-bin = { workspace = true }
|
||||
core_test_support = { workspace = true }
|
||||
ctor = { workspace = true }
|
||||
image = { workspace = true, features = ["jpeg", "png"] }
|
||||
insta = { workspace = true }
|
||||
maplit = { workspace = true }
|
||||
predicates = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
|
||||
166
codex-rs/core/tests/common/context_snapshot.rs
Normal file
166
codex-rs/core/tests/common/context_snapshot.rs
Normal file
@@ -0,0 +1,166 @@
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::responses::ResponsesRequest;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub enum ContextSnapshotRenderMode {
|
||||
#[default]
|
||||
RedactedText,
|
||||
FullText,
|
||||
KindOnly,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ContextSnapshotOptions {
|
||||
render_mode: ContextSnapshotRenderMode,
|
||||
}
|
||||
|
||||
impl Default for ContextSnapshotOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
render_mode: ContextSnapshotRenderMode::RedactedText,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextSnapshotOptions {
|
||||
pub fn render_mode(mut self, render_mode: ContextSnapshotRenderMode) -> Self {
|
||||
self.render_mode = render_mode;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn request_input_shape(request: &ResponsesRequest, options: &ContextSnapshotOptions) -> String {
|
||||
let items = request.input();
|
||||
response_items_shape(items.as_slice(), options)
|
||||
}
|
||||
|
||||
pub fn response_items_shape(items: &[Value], options: &ContextSnapshotOptions) -> String {
|
||||
items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, item)| {
|
||||
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
|
||||
return format!("{idx:02}:<MISSING_TYPE>");
|
||||
};
|
||||
|
||||
if options.render_mode == ContextSnapshotRenderMode::KindOnly {
|
||||
return if item_type == "message" {
|
||||
let role = item.get("role").and_then(Value::as_str).unwrap_or("unknown");
|
||||
format!("{idx:02}:message/{role}")
|
||||
} else {
|
||||
format!("{idx:02}:{item_type}")
|
||||
};
|
||||
}
|
||||
|
||||
match item_type {
|
||||
"message" => {
|
||||
let role = item.get("role").and_then(Value::as_str).unwrap_or("unknown");
|
||||
let text = item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.map(|content| {
|
||||
content
|
||||
.iter()
|
||||
.filter_map(|entry| entry.get("text").and_then(Value::as_str))
|
||||
.map(|text| normalize_shape_text(text, options))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" | ")
|
||||
})
|
||||
.filter(|text| !text.is_empty())
|
||||
.unwrap_or_else(|| "<NO_TEXT>".to_string());
|
||||
format!("{idx:02}:message/{role}:{text}")
|
||||
}
|
||||
"function_call" => {
|
||||
let name = item.get("name").and_then(Value::as_str).unwrap_or("unknown");
|
||||
format!("{idx:02}:function_call/{name}")
|
||||
}
|
||||
"function_call_output" => {
|
||||
let output = item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.map(|output| output.replace('\n', "\\n"))
|
||||
.unwrap_or_else(|| "<NON_STRING_OUTPUT>".to_string());
|
||||
format!("{idx:02}:function_call_output:{output}")
|
||||
}
|
||||
"local_shell_call" => {
|
||||
let command = item
|
||||
.get("action")
|
||||
.and_then(|action| action.get("command"))
|
||||
.and_then(Value::as_array)
|
||||
.map(|parts| {
|
||||
parts
|
||||
.iter()
|
||||
.filter_map(Value::as_str)
|
||||
.collect::<Vec<&str>>()
|
||||
.join(" ")
|
||||
})
|
||||
.filter(|cmd| !cmd.is_empty())
|
||||
.unwrap_or_else(|| "<NO_COMMAND>".to_string());
|
||||
format!("{idx:02}:local_shell_call:{command}")
|
||||
}
|
||||
"reasoning" => {
|
||||
let summary_text = item
|
||||
.get("summary")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|summary| summary.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|text| normalize_shape_text(text, options))
|
||||
.unwrap_or_else(|| "<NO_SUMMARY>".to_string());
|
||||
let has_encrypted_content = item
|
||||
.get("encrypted_content")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.is_empty());
|
||||
format!(
|
||||
"{idx:02}:reasoning:summary={summary_text}:encrypted={has_encrypted_content}"
|
||||
)
|
||||
}
|
||||
"compaction" => {
|
||||
let has_encrypted_content = item
|
||||
.get("encrypted_content")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.is_empty());
|
||||
format!("{idx:02}:compaction:encrypted={has_encrypted_content}")
|
||||
}
|
||||
other => format!("{idx:02}:{other}"),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub fn sectioned_request_shapes(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &ResponsesRequest)],
|
||||
options: &ContextSnapshotOptions,
|
||||
) -> String {
|
||||
let sections = sections
|
||||
.iter()
|
||||
.map(|(title, request)| format!("## {title}\n{}", request_input_shape(request, options)))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n");
|
||||
format!("Scenario: {scenario}\n\n{sections}")
|
||||
}
|
||||
|
||||
pub fn sectioned_item_shapes(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &[Value])],
|
||||
options: &ContextSnapshotOptions,
|
||||
) -> String {
|
||||
let sections = sections
|
||||
.iter()
|
||||
.map(|(title, items)| format!("## {title}\n{}", response_items_shape(items, options)))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n");
|
||||
format!("Scenario: {scenario}\n\n{sections}")
|
||||
}
|
||||
|
||||
fn normalize_shape_text(text: &str, options: &ContextSnapshotOptions) -> String {
|
||||
match options.render_mode {
|
||||
ContextSnapshotRenderMode::RedactedText | ContextSnapshotRenderMode::FullText => {
|
||||
text.replace('\n', "\\n")
|
||||
}
|
||||
ContextSnapshotRenderMode::KindOnly => unreachable!(),
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use regex_lite::Regex;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub mod context_snapshot;
|
||||
pub mod process;
|
||||
pub mod responses;
|
||||
pub mod streaming_sse;
|
||||
|
||||
@@ -818,15 +818,24 @@ where
|
||||
}
|
||||
|
||||
pub async fn mount_compact_json_once(server: &MockServer, body: serde_json::Value) -> ResponseMock {
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.respond_with(
|
||||
mount_compact_response_once(
|
||||
server,
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "application/json")
|
||||
.set_body_json(body.clone()),
|
||||
.set_body_json(body),
|
||||
)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn mount_compact_response_once(
|
||||
server: &MockServer,
|
||||
response: ResponseTemplate,
|
||||
) -> ResponseMock {
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.respond_with(response)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::context_snapshot;
|
||||
use core_test_support::context_snapshot::ContextSnapshotOptions;
|
||||
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::mount_models_once;
|
||||
@@ -28,6 +31,7 @@ use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use std::collections::VecDeque;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
@@ -43,6 +47,7 @@ use core_test_support::responses::sse_failed;
|
||||
use core_test_support::responses::sse_response;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use wiremock::MockServer;
|
||||
// --- Test helpers -----------------------------------------------------------
|
||||
@@ -60,10 +65,12 @@ const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
|
||||
const FINAL_REPLY: &str = "FINAL_REPLY";
|
||||
const CONTEXT_LIMIT_MESSAGE: &str =
|
||||
"Your input exceeds the context window of this model. Please adjust your input and try again.";
|
||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_FUNCTION_NAME: &str = "test_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
const POST_AUTO_USER_MSG: &str = "post auto follow-up";
|
||||
const PRETURN_CONTEXT_DIFF_CWD_MARKER: &str = "PRETURN_CONTEXT_DIFF_CWD";
|
||||
const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD";
|
||||
|
||||
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long threads and multiple compactions can cause the model to be less accurate. Start a new thread when possible to keep threads small and targeted.";
|
||||
|
||||
@@ -185,6 +192,23 @@ async fn assert_compaction_uses_turn_lifecycle_id(codex: &std::sync::Arc<codex_c
|
||||
"compaction item completion should use the turn event id"
|
||||
);
|
||||
}
|
||||
fn context_snapshot_options() -> ContextSnapshotOptions {
|
||||
ContextSnapshotOptions::default().render_mode(ContextSnapshotRenderMode::KindOnly)
|
||||
}
|
||||
|
||||
fn sectioned_request_shapes(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &core_test_support::responses::ResponsesRequest)],
|
||||
) -> String {
|
||||
context_snapshot::sectioned_request_shapes(scenario, sections, &context_snapshot_options())
|
||||
}
|
||||
|
||||
fn request_contains_text(
|
||||
request: &core_test_support::responses::ResponsesRequest,
|
||||
text: &str,
|
||||
) -> bool {
|
||||
body_contains_text(&request.body_json().to_string(), text)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
@@ -399,8 +423,15 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let sse_stream = sse(vec![ev_completed("r1")]);
|
||||
let response_mock = mount_sse_once(&server, sse_stream).await;
|
||||
let first_turn = sse(vec![
|
||||
ev_assistant_message("m0", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r0", 80),
|
||||
]);
|
||||
let compact_turn = sse(vec![
|
||||
ev_assistant_message("m1", SUMMARY_TEXT),
|
||||
ev_completed_with_tokens("r1", 100),
|
||||
]);
|
||||
let request_log = mount_sse_sequence(&server, vec![first_turn, compact_turn]).await;
|
||||
|
||||
let custom_prompt = "Use this compact prompt instead";
|
||||
|
||||
@@ -415,6 +446,18 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
.expect("create conversation")
|
||||
.codex;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit first user turn");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.expect("trigger compact");
|
||||
let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
@@ -423,7 +466,13 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let body = response_mock.single_request().body_json();
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
2,
|
||||
"expected first turn and compact requests"
|
||||
);
|
||||
let body = requests[1].body_json();
|
||||
|
||||
let input = body
|
||||
.get("input")
|
||||
@@ -466,6 +515,10 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse_first_turn = sse(vec![
|
||||
ev_assistant_message("m0", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r0", 80),
|
||||
]);
|
||||
// Compact run where the API reports zero tokens in usage. Our local
|
||||
// estimator should still compute a non-zero context size for the compacted
|
||||
// history.
|
||||
@@ -473,7 +526,7 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
|
||||
ev_assistant_message("m1", SUMMARY_TEXT),
|
||||
ev_completed_with_tokens("r1", 0),
|
||||
]);
|
||||
mount_sse_once(&server, sse_compact).await;
|
||||
mount_sse_sequence(&server, vec![sse_first_turn, sse_compact]).await;
|
||||
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
@@ -482,39 +535,41 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
|
||||
});
|
||||
let codex = builder.build(&server).await.unwrap().codex;
|
||||
|
||||
// Trigger manual compact and collect TokenCount events for the compact turn.
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
|
||||
// First TokenCount: from the compact API call (usage.total_tokens = 0).
|
||||
let first = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::TokenCount(tc) => tc
|
||||
.info
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.total_tokens),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
// Second TokenCount: from the local post-compaction estimate.
|
||||
let last = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::TokenCount(tc) => tc
|
||||
.info
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.total_tokens),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
// Ensure the compact task itself completes.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(
|
||||
first, 0,
|
||||
"expected first TokenCount from compact API usage to be zero"
|
||||
// Trigger manual compact and collect TokenCount events for the compact turn.
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
let mut token_totals = Vec::new();
|
||||
loop {
|
||||
let event = wait_for_event(&codex, |_| true).await;
|
||||
match event {
|
||||
EventMsg::TokenCount(tc) => {
|
||||
if let Some(info) = tc.info {
|
||||
token_totals.push(info.last_token_usage.total_tokens);
|
||||
}
|
||||
}
|
||||
EventMsg::TurnComplete(_) => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
token_totals.contains(&0),
|
||||
"expected compact turn to emit TokenCount usage.total_tokens = 0"
|
||||
);
|
||||
assert!(
|
||||
last > 0,
|
||||
"second TokenCount should reflect a non-zero estimated context size after compaction"
|
||||
token_totals.iter().any(|total| *total > 0),
|
||||
"expected compact turn to emit non-zero estimated local context size after compaction"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -2124,6 +2179,82 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Re-enable after the follow-up compaction behavior PR lands.
|
||||
// Current main behavior around non-context manual /compact failures is known-incorrect.
|
||||
#[ignore = "behavior change covered in follow-up compaction PR"]
|
||||
async fn manual_compact_non_context_failure_retries_then_emits_task_error() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let user_turn = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed("r1"),
|
||||
]);
|
||||
let compact_failed_1 = sse_failed(
|
||||
"resp-fail-1",
|
||||
"server_error",
|
||||
"temporary compact failure one",
|
||||
);
|
||||
let compact_failed_2 = sse_failed(
|
||||
"resp-fail-2",
|
||||
"server_error",
|
||||
"temporary compact failure two",
|
||||
);
|
||||
|
||||
mount_sse_sequence(&server, vec![user_turn, compact_failed_1, compact_failed_2]).await;
|
||||
|
||||
let mut model_provider = non_openai_model_provider(&server);
|
||||
model_provider.stream_max_retries = Some(1);
|
||||
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "first turn".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.expect("trigger compact");
|
||||
|
||||
let reconnect_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::StreamError(stream_error) => Some(stream_error.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert!(
|
||||
reconnect_message.contains("Reconnecting... 1/1"),
|
||||
"expected reconnect stream error message, got {reconnect_message}"
|
||||
);
|
||||
|
||||
let task_error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert!(
|
||||
task_error_message.contains("Error running local compact task"),
|
||||
"expected local compact task error prefix, got {task_error_message}"
|
||||
);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
skip_if_no_network!();
|
||||
@@ -2287,15 +2418,27 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
.unwrap_or_else(|| panic!("final turn request missing for {final_user_message}"))
|
||||
.input()
|
||||
.into_iter()
|
||||
.filter(|item| {
|
||||
let role = item
|
||||
.get("role")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
let text = item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|v| v.first())
|
||||
.and_then(|v| v.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
if role == "developer" {
|
||||
return false;
|
||||
}
|
||||
!(text.starts_with("# AGENTS.md instructions for ")
|
||||
|| text.starts_with("<environment_context>")
|
||||
|| text.starts_with("<turn_aborted>"))
|
||||
})
|
||||
.collect::<VecDeque<_>>();
|
||||
|
||||
// Permissions developer message
|
||||
final_output.pop_front();
|
||||
// User instructions (project docs/skills)
|
||||
final_output.pop_front();
|
||||
// Environment context
|
||||
final_output.pop_front();
|
||||
|
||||
let _ = final_output
|
||||
.iter_mut()
|
||||
.map(drop_call_id)
|
||||
@@ -2819,3 +2962,467 @@ async fn auto_compact_runs_when_reasoning_header_clears_between_turns() {
|
||||
"remote compaction should run once after the reasoning header clears"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once pre-turn compaction includes incoming user input.
|
||||
async fn snapshot_request_shape_pre_turn_compaction_including_incoming_user_message() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse1 = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 60),
|
||||
]);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", "SECOND_REPLY"),
|
||||
ev_completed_with_tokens("r2", 500),
|
||||
]);
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", "PRE_TURN_SUMMARY"),
|
||||
ev_completed_with_tokens("r3", 100),
|
||||
]);
|
||||
let sse4 = sse(vec![
|
||||
ev_assistant_message("m4", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r4", 80),
|
||||
]);
|
||||
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4]).await;
|
||||
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
for user in ["USER_ONE", "USER_TWO"] {
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: user.to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
}
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: Some(PathBuf::from(PRETURN_CONTEXT_DIFF_CWD)),
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
windows_sandbox_level: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await
|
||||
.expect("override turn context");
|
||||
let image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
|
||||
.to_string();
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![
|
||||
UserInput::Image {
|
||||
image_url: image_url.clone(),
|
||||
},
|
||||
UserInput::Text {
|
||||
text: "USER_THREE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
},
|
||||
],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(requests.len(), 4, "expected user, user, compact, follow-up");
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"pre_turn_compaction_including_incoming_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Pre-turn auto-compaction with a context override emits the context diff in the compact request while the incoming user message is still excluded.",
|
||||
&[
|
||||
("Local Compaction Request", &requests[2]),
|
||||
("Local Post-Compaction History Layout", &requests[3]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[2], SUMMARIZATION_PROMPT),
|
||||
"expected compact request to include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[2], PRETURN_CONTEXT_DIFF_CWD_MARKER),
|
||||
"expected compact request to include pre-turn context diff"
|
||||
);
|
||||
assert!(
|
||||
!request_contains_text(&requests[2], "USER_THREE"),
|
||||
"current behavior excludes incoming user message from pre-turn compaction input"
|
||||
);
|
||||
let follow_up_has_incoming_image = requests[3].inputs_of_type("message").iter().any(|item| {
|
||||
if item.get("role").and_then(Value::as_str) != Some("user") {
|
||||
return false;
|
||||
}
|
||||
let Some(content) = item.get("content").and_then(Value::as_array) else {
|
||||
return false;
|
||||
};
|
||||
let has_user_text = content.iter().any(|span| {
|
||||
span.get("type").and_then(Value::as_str) == Some("input_text")
|
||||
&& span.get("text").and_then(Value::as_str) == Some("USER_THREE")
|
||||
});
|
||||
let has_image = content.iter().any(|span| {
|
||||
span.get("type").and_then(Value::as_str) == Some("input_image")
|
||||
&& span.get("image_url").and_then(Value::as_str) == Some(image_url.as_str())
|
||||
});
|
||||
has_user_text && has_image
|
||||
});
|
||||
assert!(
|
||||
follow_up_has_incoming_image,
|
||||
"expected post-compaction follow-up request to keep incoming user image content"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[3], &summary_with_prefix("PRE_TURN_SUMMARY")),
|
||||
"expected post-compaction request to include summary text"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once pre-turn compaction context-overflow handling includes incoming
|
||||
// user input and emits richer oversized-input messaging.
|
||||
async fn snapshot_request_shape_pre_turn_compaction_context_window_exceeded() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
let mut responses = vec![first_turn];
|
||||
responses.extend(
|
||||
(0..6).map(|_| {
|
||||
sse_failed(
|
||||
"compact-failed",
|
||||
"context_length_exceeded",
|
||||
"Your input exceeds the context window of this model. Please adjust your input and try again.",
|
||||
)
|
||||
}),
|
||||
);
|
||||
let request_log = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let mut model_provider = non_openai_model_provider(&server);
|
||||
model_provider.stream_max_retries = Some(0);
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit first user");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_TWO".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit second user");
|
||||
let error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert!(
|
||||
requests.len() >= 2,
|
||||
"expected first turn and at least one compaction request"
|
||||
);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"pre_turn_compaction_context_window_exceeded_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.",
|
||||
&[(
|
||||
"Local Compaction Request (Incoming User Excluded)",
|
||||
&requests[1]
|
||||
),]
|
||||
)
|
||||
);
|
||||
|
||||
assert!(
|
||||
!request_contains_text(&requests[1], "USER_TWO"),
|
||||
"current behavior excludes incoming user message from pre-turn compaction input"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("ran out of room in the model's context window"),
|
||||
"expected context window exceeded message, got {error_message}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_mid_turn_continuation_compaction() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
let auto_compact_turn = sse(vec![
|
||||
ev_assistant_message("m2", "MID_TURN_SUMMARY"),
|
||||
ev_completed_with_tokens("r2", 100),
|
||||
]);
|
||||
let post_compact_turn = sse(vec![
|
||||
ev_assistant_message("m3", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r3", 80),
|
||||
]);
|
||||
let request_log = mount_sse_sequence(
|
||||
&server,
|
||||
vec![first_turn, auto_compact_turn, post_compact_turn],
|
||||
)
|
||||
.await;
|
||||
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(requests.len(), 3, "expected user, compact, follow-up");
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"mid_turn_compaction_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.",
|
||||
&[
|
||||
("Local Compaction Request", &requests[1]),
|
||||
("Local Post-Compaction History Layout", &requests[2]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
!requests[1]
|
||||
.inputs_of_type("function_call_output")
|
||||
.is_empty(),
|
||||
"mid-turn compaction request should include function call output"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[1], SUMMARIZATION_PROMPT),
|
||||
"mid-turn compaction request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[2], &summary_with_prefix("MID_TURN_SUMMARY")),
|
||||
"post-mid-turn compaction request should include summary text"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_manual_compact_without_previous_user_messages() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let compact_turn = sse(vec![
|
||||
ev_assistant_message("m1", "MANUAL_EMPTY_SUMMARY"),
|
||||
ev_completed_with_tokens("r1", 90),
|
||||
]);
|
||||
let follow_up_turn = sse(vec![
|
||||
ev_assistant_message("m2", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r2", 80),
|
||||
]);
|
||||
let request_log = mount_sse_sequence(&server, vec![compact_turn, follow_up_turn]).await;
|
||||
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
codex.submit(Op::Compact).await.expect("run /compact");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "AFTER_MANUAL_EMPTY_COMPACT".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit follow-up user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
2,
|
||||
"expected manual /compact request and follow-up turn request"
|
||||
);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"manual_compact_without_prev_user_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Manual /compact with no prior user turn currently still issues a compaction request; follow-up turn carries canonical context and the new user message.",
|
||||
&[
|
||||
("Local Compaction Request", &requests[0]),
|
||||
("Local Post-Compaction History Layout", &requests[1]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[0], SUMMARIZATION_PROMPT),
|
||||
"manual /compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[1], "AFTER_MANUAL_EMPTY_COMPACT"),
|
||||
"follow-up request should include the submitted user message"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_manual_compact_with_previous_user_messages() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 80),
|
||||
]);
|
||||
let compact_turn = sse(vec![
|
||||
ev_assistant_message("m2", "MANUAL_SUMMARY"),
|
||||
ev_completed_with_tokens("r2", 90),
|
||||
]);
|
||||
let follow_up_turn = sse(vec![
|
||||
ev_assistant_message("m3", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r3", 80),
|
||||
]);
|
||||
let request_log =
|
||||
mount_sse_sequence(&server, vec![first_turn, compact_turn, follow_up_turn]).await;
|
||||
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(config);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit first user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.expect("run /compact");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_TWO".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit second user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(requests.len(), 3, "expected user, compact, follow-up");
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"manual_compact_with_history_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Manual /compact with prior user history compacts existing history and the follow-up turn includes the compact summary plus new user message.",
|
||||
&[
|
||||
("Local Compaction Request", &requests[1]),
|
||||
("Local Post-Compaction History Layout", &requests[2]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[1], "USER_ONE"),
|
||||
"manual compact request should include existing user history"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[1], SUMMARIZATION_PROMPT),
|
||||
"manual compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[2], &summary_with_prefix("MANUAL_SUMMARY")),
|
||||
"post-compact request should include compact summary text"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[2], "USER_TWO"),
|
||||
"post-compact request should include the latest user message"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::compact::SUMMARY_PREFIX;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ItemCompletedEvent;
|
||||
use codex_core::protocol::ItemStartedEvent;
|
||||
@@ -14,6 +16,9 @@ use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::context_snapshot;
|
||||
use core_test_support::context_snapshot::ContextSnapshotOptions;
|
||||
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
@@ -23,6 +28,7 @@ use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
use wiremock::ResponseTemplate;
|
||||
|
||||
fn approx_token_count(text: &str) -> i64 {
|
||||
i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
|
||||
@@ -39,6 +45,51 @@ fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64
|
||||
.saturating_add(approx_token_count(&request.instructions_text()))
|
||||
}
|
||||
|
||||
const DUMMY_FUNCTION_NAME: &str = "test_tool";
|
||||
const PRETURN_CONTEXT_DIFF_CWD_MARKER: &str = "PRETURN_CONTEXT_DIFF_CWD";
|
||||
const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD";
|
||||
|
||||
fn summary_with_prefix(summary: &str) -> String {
|
||||
format!("{SUMMARY_PREFIX}\n{summary}")
|
||||
}
|
||||
|
||||
fn user_message_item(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn context_snapshot_options() -> ContextSnapshotOptions {
|
||||
ContextSnapshotOptions::default().render_mode(ContextSnapshotRenderMode::KindOnly)
|
||||
}
|
||||
|
||||
fn sectioned_request_shapes(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &responses::ResponsesRequest)],
|
||||
) -> String {
|
||||
context_snapshot::sectioned_request_shapes(scenario, sections, &context_snapshot_options())
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn request_contains_text(request: &responses::ResponsesRequest, text: &str) -> bool {
|
||||
request
|
||||
.body_json()
|
||||
.to_string()
|
||||
.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -497,7 +548,7 @@ async fn auto_remote_compact_failure_stops_agent_loop() -> Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
let first_compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": "invalid compact payload shape" }),
|
||||
)
|
||||
@@ -540,13 +591,20 @@ async fn auto_remote_compact_failure_stops_agent_loop() -> Result<()> {
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert!(
|
||||
error_message.contains("Error running remote compact task"),
|
||||
"expected compact failure error, got {error_message}"
|
||||
!error_message.contains(
|
||||
"Incoming user message and/or turn context is too large to fit in context window"
|
||||
),
|
||||
"non-context compaction failures should surface real error messages, got {error_message}"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("invalid compact payload shape")
|
||||
|| error_message.contains("invalid type: string"),
|
||||
"expected remote compact parse failure to surface, got {error_message}"
|
||||
);
|
||||
assert_eq!(
|
||||
compact_mock.requests().len(),
|
||||
first_compact_mock.requests().len(),
|
||||
1,
|
||||
"expected exactly one remote compact attempt"
|
||||
"expected first remote compact attempt with incoming items"
|
||||
);
|
||||
assert!(
|
||||
post_compact_turn_mock.requests().is_empty(),
|
||||
@@ -851,6 +909,68 @@ async fn remote_manual_compact_emits_context_compaction_items() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_manual_compact_failure_emits_task_error_event() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
mount_sse_once(
|
||||
harness.server(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_REPLY"),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": "invalid compact payload shape" }),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "manual remote compact".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
|
||||
let error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert!(
|
||||
error_message.contains("Error running remote compact task"),
|
||||
"expected remote compact task error prefix, got {error_message}"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("invalid compact payload shape")
|
||||
|| error_message.contains("invalid type: string"),
|
||||
"expected invalid compact payload details, got {error_message}"
|
||||
);
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Re-enable after the follow-up compaction behavior PR lands.
|
||||
// Current main behavior for rollout replacement-history persistence is known-incorrect.
|
||||
#[ignore = "behavior change covered in follow-up compaction PR"]
|
||||
async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -980,11 +1100,11 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()>
|
||||
)
|
||||
});
|
||||
|
||||
if has_compacted_user_summary
|
||||
&& has_compaction_item
|
||||
&& has_compacted_assistant_note
|
||||
&& has_permissions_developer_message
|
||||
{
|
||||
if has_compacted_user_summary && has_compaction_item && has_compacted_assistant_note {
|
||||
assert!(
|
||||
!has_permissions_developer_message,
|
||||
"manual remote compact rollout replacement history should not inject permissions context"
|
||||
);
|
||||
saw_compacted_history = true;
|
||||
break;
|
||||
}
|
||||
@@ -1250,3 +1370,584 @@ async fn remote_compact_refreshes_stale_developer_instructions_without_resume()
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once remote pre-turn compaction includes incoming user input.
|
||||
async fn snapshot_request_shape_remote_pre_turn_compaction_including_incoming_user_message()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 60),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_SECOND_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 500),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m3", "REMOTE_FINAL_REPLY"),
|
||||
responses::ev_completed_with_tokens("r3", 80),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![
|
||||
user_message_item("USER_ONE"),
|
||||
user_message_item("USER_TWO"),
|
||||
user_message_item(&summary_with_prefix("REMOTE_PRE_TURN_SUMMARY")),
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history }),
|
||||
)
|
||||
.await;
|
||||
|
||||
for user in ["USER_ONE", "USER_TWO", "USER_THREE"] {
|
||||
if user == "USER_THREE" {
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: Some(PathBuf::from(PRETURN_CONTEXT_DIFF_CWD)),
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
windows_sandbox_level: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: user.to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
}
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
3,
|
||||
"expected user, user, and post-compact turn"
|
||||
);
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_pre_turn_compaction_including_incoming_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote pre-turn auto-compaction with a context override emits the context diff in the compact request while excluding the incoming user message.",
|
||||
&[
|
||||
("Remote Compaction Request", &compact_request),
|
||||
("Remote Post-Compaction History Layout", &requests[2]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&compact_request, PRETURN_CONTEXT_DIFF_CWD_MARKER),
|
||||
"expected remote compact request to include pre-turn context diff"
|
||||
);
|
||||
assert!(
|
||||
!request_contains_text(&compact_request, "USER_THREE"),
|
||||
"current behavior excludes incoming user message from remote pre-turn compaction input"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(
|
||||
&requests[2],
|
||||
&summary_with_prefix("REMOTE_PRE_TURN_SUMMARY")
|
||||
),
|
||||
"post-compaction request should include remote summary text"
|
||||
);
|
||||
assert_eq!(
|
||||
requests[2]
|
||||
.message_input_texts("user")
|
||||
.iter()
|
||||
.filter(|text| text.as_str() == "USER_THREE")
|
||||
.count(),
|
||||
1,
|
||||
"post-compaction request should contain incoming user exactly once from runtime append"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once remote pre-turn compaction failure path includes incoming
|
||||
// user input.
|
||||
async fn snapshot_request_shape_remote_pre_turn_compaction_failure_stops_without_retry()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 500),
|
||||
])],
|
||||
)
|
||||
.await;
|
||||
|
||||
let first_compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": "invalid compact payload shape" }),
|
||||
)
|
||||
.await;
|
||||
let post_compact_turn_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_POST_COMPACT_SHOULD_NOT_RUN"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_TWO".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(first_compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
1,
|
||||
"expected no post-compaction follow-up turn request after compact failure"
|
||||
);
|
||||
assert!(
|
||||
post_compact_turn_mock.requests().is_empty(),
|
||||
"expected turn to stop after compaction failure"
|
||||
);
|
||||
|
||||
let include_attempt_request = first_compact_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_pre_turn_compaction_failure_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote pre-turn auto-compaction parse failure: compaction request excludes the incoming user message and the turn stops.",
|
||||
&[(
|
||||
"Remote Compaction Request (Incoming User Excluded)",
|
||||
&include_attempt_request
|
||||
),]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
!request_contains_text(&include_attempt_request, "USER_TWO"),
|
||||
"current behavior excludes incoming user message from remote pre-turn compaction input"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("invalid compact payload shape")
|
||||
|| error_message.contains("invalid type: string"),
|
||||
"expected compact parse failure to surface, got {error_message}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once remote pre-turn compaction context-overflow handling includes
|
||||
// incoming user input and emits richer oversized-input messaging.
|
||||
async fn snapshot_request_shape_remote_pre_turn_compaction_context_window_exceeded() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 500),
|
||||
])],
|
||||
)
|
||||
.await;
|
||||
|
||||
let compact_mock = responses::mount_compact_response_once(
|
||||
harness.server(),
|
||||
ResponseTemplate::new(400).set_body_json(serde_json::json!({
|
||||
"error": {
|
||||
"code": "context_length_exceeded",
|
||||
"message": "Your input exceeds the context window of this model. Please adjust your input and try again."
|
||||
}
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
let post_compact_turn_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_POST_COMPACT_SHOULD_NOT_RUN"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_TWO".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
1,
|
||||
"expected no post-compaction follow-up turn request after compact failure"
|
||||
);
|
||||
assert!(
|
||||
post_compact_turn_mock.requests().is_empty(),
|
||||
"expected turn to stop after compaction failure"
|
||||
);
|
||||
|
||||
let include_attempt_request = compact_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_pre_turn_compaction_context_window_exceeded_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.",
|
||||
&[(
|
||||
"Remote Compaction Request (Incoming User Excluded)",
|
||||
&include_attempt_request
|
||||
),]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
!request_contains_text(&include_attempt_request, "USER_TWO"),
|
||||
"current behavior excludes incoming user message from remote pre-turn compaction input"
|
||||
);
|
||||
assert!(
|
||||
error_message.to_lowercase().contains("context window"),
|
||||
"expected context window failure to surface, got {error_message}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_remote_mid_turn_continuation_compaction() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_function_call("call-remote-mid-turn", DUMMY_FUNCTION_NAME, "{}"),
|
||||
responses::ev_completed_with_tokens("r1", 500),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_MID_TURN_FINAL_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![
|
||||
user_message_item("USER_ONE"),
|
||||
user_message_item(&summary_with_prefix("REMOTE_MID_TURN_SUMMARY")),
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history }),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
2,
|
||||
"expected initial and post-compact requests"
|
||||
);
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_mid_turn_compaction_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.",
|
||||
&[
|
||||
("Remote Compaction Request", &compact_request),
|
||||
("Remote Post-Compaction History Layout", &requests[1]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
!compact_request
|
||||
.inputs_of_type("function_call_output")
|
||||
.is_empty(),
|
||||
"remote mid-turn compaction request should include function call output"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(
|
||||
&requests[1],
|
||||
&summary_with_prefix("REMOTE_MID_TURN_SUMMARY")
|
||||
),
|
||||
"post-mid-turn request should include remote compaction summary text"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once manual remote /compact with no prior user turn becomes a no-op.
|
||||
async fn snapshot_request_shape_remote_manual_compact_without_previous_user_messages() -> Result<()>
|
||||
{
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_MANUAL_EMPTY_FOLLOW_UP_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 80),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let compact_mock =
|
||||
responses::mount_compact_json_once(harness.server(), serde_json::json!({ "output": [] }))
|
||||
.await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(
|
||||
compact_mock.requests().len(),
|
||||
1,
|
||||
"current behavior still issues remote compaction for manual /compact without prior user"
|
||||
);
|
||||
let compact_request = compact_mock.single_request();
|
||||
let follow_up_request = responses_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_manual_compact_without_prev_user_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote manual /compact with no prior user turn still issues a compact request; follow-up turn carries canonical context and new user message.",
|
||||
&[
|
||||
("Remote Compaction Request", &compact_request),
|
||||
("Remote Post-Compaction History Layout", &follow_up_request),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
!request_contains_text(
|
||||
&follow_up_request,
|
||||
&summary_with_prefix("REMOTE_MANUAL_EMPTY_SUMMARY"),
|
||||
),
|
||||
"post-compact request should not include compact summary when remote compaction is skipped"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&follow_up_request, "USER_ONE"),
|
||||
"post-compact request should include the submitted user message"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn snapshot_request_shape_remote_manual_compact_with_previous_user_messages() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "REMOTE_MANUAL_FIRST_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 80),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "REMOTE_MANUAL_FOLLOW_UP_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![
|
||||
user_message_item("USER_ONE"),
|
||||
user_message_item(&summary_with_prefix("REMOTE_MANUAL_WITH_HISTORY_SUMMARY")),
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history }),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_ONE".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "USER_TWO".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(requests.len(), 2, "expected user and post-compact requests");
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
insta::assert_snapshot!(
|
||||
"remote_manual_compact_with_history_shapes",
|
||||
sectioned_request_shapes(
|
||||
"Remote manual /compact with prior user history compacts existing history and follow-up includes compact summary plus new user message.",
|
||||
&[
|
||||
("Remote Compaction Request", &compact_request),
|
||||
("Remote Post-Compaction History Layout", &requests[1]),
|
||||
]
|
||||
)
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&compact_request, "USER_ONE"),
|
||||
"remote compaction request should include existing user history"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(&requests[1], "USER_TWO"),
|
||||
"post-compact request should include latest user message"
|
||||
);
|
||||
assert!(
|
||||
request_contains_text(
|
||||
&requests[1],
|
||||
&summary_with_prefix("REMOTE_MANUAL_WITH_HISTORY_SUMMARY"),
|
||||
),
|
||||
"post-compact request should include remote compact summary text"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
source: core/tests/suite/compact.rs
|
||||
expression: "sectioned_request_shapes(\"Manual /compact with prior user history compacts existing history and the follow-up turn includes the compact summary plus new user message.\",\n&[(\"Local Compaction Request\", &requests[1]),\n(\"Local Post-Compaction History Layout\", &requests[2]),])"
|
||||
---
|
||||
Scenario: Manual /compact with prior user history compacts existing history and the follow-up turn includes the compact summary plus new user message.
|
||||
|
||||
## Local Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:FIRST_REPLY
|
||||
05:message/user:<SUMMARIZATION_PROMPT>
|
||||
|
||||
## Local Post-Compaction History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nMANUAL_SUMMARY
|
||||
05:message/user:USER_TWO
|
||||
@@ -0,0 +1,18 @@
|
||||
---
|
||||
source: core/tests/suite/compact.rs
|
||||
expression: "sectioned_request_shapes(\"Manual /compact with no prior user turn currently still issues a compaction request; follow-up turn carries canonical context and the new user message.\",\n&[(\"Local Compaction Request\", &requests[0]),\n(\"Local Post-Compaction History Layout\", &requests[1]),])"
|
||||
---
|
||||
Scenario: Manual /compact with no prior user turn currently still issues a compaction request; follow-up turn carries canonical context and the new user message.
|
||||
|
||||
## Local Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:<SUMMARIZATION_PROMPT>
|
||||
|
||||
## Local Post-Compaction History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nMANUAL_EMPTY_SUMMARY
|
||||
04:message/user:AFTER_MANUAL_EMPTY_COMPACT
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
source: core/tests/suite/compact.rs
|
||||
expression: "sectioned_request_shapes(\"Mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.\",\n&[(\"Local Compaction Request\", &requests[1]),\n(\"Local Post-Compaction History Layout\", &requests[2]),])"
|
||||
---
|
||||
Scenario: Mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.
|
||||
|
||||
## Local Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/<TOOL_CALL>
|
||||
05:function_call_output:<TOOL_ERROR_OUTPUT>
|
||||
06:message/user:<SUMMARIZATION_PROMPT>
|
||||
|
||||
## Local Post-Compaction History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nMID_TURN_SUMMARY
|
||||
@@ -0,0 +1,13 @@
|
||||
---
|
||||
source: core/tests/suite/compact.rs
|
||||
expression: "sectioned_request_shapes(\"Pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.\",\n&[(\"Local Compaction Request (Incoming User Excluded)\", &requests[1]),])"
|
||||
---
|
||||
Scenario: Pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.
|
||||
|
||||
## Local Compaction Request (Incoming User Excluded)
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:FIRST_REPLY
|
||||
05:message/user:<SUMMARIZATION_PROMPT>
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
source: core/tests/suite/compact.rs
|
||||
expression: "sectioned_request_shapes(\"Pre-turn auto-compaction with a context override emits the context diff in the compact request while the incoming user message is still excluded.\",\n&[(\"Local Compaction Request\", &requests[2]),\n(\"Local Post-Compaction History Layout\", &requests[3]),])"
|
||||
---
|
||||
Scenario: Pre-turn auto-compaction with a context override emits the context diff in the compact request while the incoming user message is still excluded.
|
||||
|
||||
## Local Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:FIRST_REPLY
|
||||
05:message/user:USER_TWO
|
||||
06:message/assistant:SECOND_REPLY
|
||||
07:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
08:message/user:<SUMMARIZATION_PROMPT>
|
||||
|
||||
## Local Post-Compaction History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
03:message/user:USER_ONE
|
||||
04:message/user:USER_TWO
|
||||
05:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nPRE_TURN_SUMMARY
|
||||
06:message/user:<image> | </image> | USER_THREE
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote manual /compact with prior user history compacts existing history and follow-up includes compact summary plus new user message.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &requests[1]),])"
|
||||
---
|
||||
Scenario: Remote manual /compact with prior user history compacts existing history and follow-up includes compact summary plus new user message.
|
||||
|
||||
## Remote Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_MANUAL_FIRST_REPLY
|
||||
|
||||
## Remote Post-Compaction History Layout
|
||||
00:message/user:USER_ONE
|
||||
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
02:message/user:<AGENTS_MD>
|
||||
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
04:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nREMOTE_MANUAL_WITH_HISTORY_SUMMARY
|
||||
05:message/user:USER_TWO
|
||||
@@ -0,0 +1,16 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote manual /compact with no prior user turn still issues a compact request; follow-up turn carries canonical context and new user message.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &follow_up_request),])"
|
||||
---
|
||||
Scenario: Remote manual /compact with no prior user turn still issues a compact request; follow-up turn carries canonical context and new user message.
|
||||
|
||||
## Remote Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
|
||||
## Remote Post-Compaction History Layout
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &requests[1]),])"
|
||||
---
|
||||
Scenario: Remote mid-turn continuation compaction after tool output: compact request includes tool artifacts and follow-up request includes the summary.
|
||||
|
||||
## Remote Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:function_call/<TOOL_CALL>
|
||||
05:function_call_output:<TOOL_ERROR_OUTPUT>
|
||||
|
||||
## Remote Post-Compaction History Layout
|
||||
00:message/user:USER_ONE
|
||||
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
02:message/user:<AGENTS_MD>
|
||||
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
04:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nREMOTE_MID_TURN_SUMMARY
|
||||
@@ -0,0 +1,12 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.\",\n&[(\"Remote Compaction Request (Incoming User Excluded)\",\n&include_attempt_request),])"
|
||||
---
|
||||
Scenario: Remote pre-turn auto-compaction context-window failure: compaction request excludes the incoming user message and the turn errors.
|
||||
|
||||
## Remote Compaction Request (Incoming User Excluded)
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_FIRST_REPLY
|
||||
@@ -0,0 +1,12 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote pre-turn auto-compaction parse failure: compaction request excludes the incoming user message and the turn stops.\",\n&[(\"Remote Compaction Request (Incoming User Excluded)\",\n&include_attempt_request),])"
|
||||
---
|
||||
Scenario: Remote pre-turn auto-compaction parse failure: compaction request excludes the incoming user message and the turn stops.
|
||||
|
||||
## Remote Compaction Request (Incoming User Excluded)
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_FIRST_REPLY
|
||||
@@ -0,0 +1,24 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
expression: "sectioned_request_shapes(\"Remote pre-turn auto-compaction with a context override emits the context diff in the compact request while excluding the incoming user message.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &requests[2]),])"
|
||||
---
|
||||
Scenario: Remote pre-turn auto-compaction with a context override emits the context diff in the compact request while excluding the incoming user message.
|
||||
|
||||
## Remote Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user:<AGENTS_MD>
|
||||
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
03:message/user:USER_ONE
|
||||
04:message/assistant:REMOTE_FIRST_REPLY
|
||||
05:message/user:USER_TWO
|
||||
06:message/assistant:REMOTE_SECOND_REPLY
|
||||
07:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
|
||||
## Remote Post-Compaction History Layout
|
||||
00:message/user:USER_ONE
|
||||
01:message/user:USER_TWO
|
||||
02:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
03:message/user:<AGENTS_MD>
|
||||
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
|
||||
05:message/user:Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\nREMOTE_PRE_TURN_SUMMARY
|
||||
06:message/user:USER_THREE
|
||||
Reference in New Issue
Block a user