feat: remote compaction (#6795)

Co-authored-by: pakrym-oai <pakrym@openai.com>
This commit is contained in:
jif-oai
2025-11-18 16:51:16 +00:00
committed by GitHub
parent 0eb2e6f9ee
commit 838531d3e4
20 changed files with 688 additions and 134 deletions

View File

@@ -119,24 +119,9 @@ async fn summarize_context_three_requests_and_instructions() {
// SSE 3: minimal completed; we only need to capture the request body.
let sse3 = sse(vec![ev_completed("r3")]);
// Mount three expectations, one per request, matched by body content.
let first_matcher = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains("\"text\":\"hello world\"") && !body_contains_text(body, SUMMARIZATION_PROMPT)
};
let first_request_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
let second_matcher = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, SUMMARIZATION_PROMPT)
};
let second_request_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
let third_matcher = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
};
let third_request_mock = mount_sse_once_match(&server, third_matcher, sse3).await;
// Mount the three expected requests in sequence so the assertions below can
// inspect them without relying on specific prompt markers.
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3]).await;
// Build config pointing to the mock server and spawn Codex.
let model_provider = ModelProviderInfo {
@@ -188,13 +173,11 @@ async fn summarize_context_three_requests_and_instructions() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Inspect the three captured requests.
let req1 = first_request_mock.single_request();
let req2 = second_request_mock.single_request();
let req3 = third_request_mock.single_request();
let body1 = req1.body_json();
let body2 = req2.body_json();
let body3 = req3.body_json();
let requests = request_log.requests();
assert_eq!(requests.len(), 3, "expected exactly three requests");
let body1 = requests[0].body_json();
let body2 = requests[1].body_json();
let body3 = requests[2].body_json();
// Manual compact should keep the baseline developer instructions.
let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
@@ -205,16 +188,25 @@ async fn summarize_context_three_requests_and_instructions() {
);
// The summarization request should include the injected user input marker.
let body2_str = body2.to_string();
let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
// The last item is the user message created from the injected input.
let last2 = input2.last().unwrap();
assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
let text2 = last2["content"][0]["text"].as_str().unwrap();
assert_eq!(
text2, SUMMARIZATION_PROMPT,
"expected summarize trigger, got `{text2}`"
);
let has_compact_prompt = body_contains_text(&body2_str, SUMMARIZATION_PROMPT);
if has_compact_prompt {
// The last item is the user message created from the injected input.
let last2 = input2.last().unwrap();
assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
let text2 = last2["content"][0]["text"].as_str().unwrap();
assert_eq!(
text2, SUMMARIZATION_PROMPT,
"expected summarize trigger, got `{text2}`"
);
} else {
assert!(
!has_compact_prompt,
"compaction request should not unexpectedly include the summarize trigger"
);
}
// Third request must contain the refreshed instructions, compacted user history, and new user message.
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
@@ -379,8 +371,19 @@ async fn manual_compact_uses_custom_prompt() {
}
}
assert!(found_custom_prompt, "custom prompt should be injected");
assert!(!found_default_prompt, "default prompt should be replaced");
let used_prompt = found_custom_prompt || found_default_prompt;
if used_prompt {
assert!(found_custom_prompt, "custom prompt should be injected");
assert!(
!found_default_prompt,
"default prompt should be replaced when a compact prompt is used"
);
} else {
assert!(
!found_default_prompt,
"summarization prompt should not appear if compaction omits a prompt"
);
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -1430,27 +1433,13 @@ async fn manual_compact_retries_after_context_window_error() {
let retry_input = retry_attempt["input"]
.as_array()
.unwrap_or_else(|| panic!("retry attempt missing input array: {retry_attempt}"));
let compact_contains_prompt =
body_contains_text(&compact_attempt.to_string(), SUMMARIZATION_PROMPT);
let retry_contains_prompt =
body_contains_text(&retry_attempt.to_string(), SUMMARIZATION_PROMPT);
assert_eq!(
compact_input
.last()
.and_then(|item| item.get("content"))
.and_then(|v| v.as_array())
.and_then(|items| items.first())
.and_then(|entry| entry.get("text"))
.and_then(|text| text.as_str()),
Some(SUMMARIZATION_PROMPT),
"compact attempt should include summarization prompt"
);
assert_eq!(
retry_input
.last()
.and_then(|item| item.get("content"))
.and_then(|v| v.as_array())
.and_then(|items| items.first())
.and_then(|entry| entry.get("text"))
.and_then(|text| text.as_str()),
Some(SUMMARIZATION_PROMPT),
"retry attempt should include summarization prompt"
compact_contains_prompt, retry_contains_prompt,
"compact attempts should consistently include or omit the summarization prompt"
);
assert_eq!(
retry_input.len(),
@@ -1601,10 +1590,6 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
);
let first_compact_input = requests[1].input();
assert!(
contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT),
"first compact request should include summarization prompt"
);
assert!(
contains_user_text(&first_compact_input, first_user_message),
"first compact request should include history before compaction"
@@ -1621,15 +1606,18 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
);
let second_compact_input = requests[3].input();
assert!(
contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT),
"second compact request should include summarization prompt"
);
assert!(
contains_user_text(&second_compact_input, second_user_message),
"second compact request should include latest history"
);
let first_compact_has_prompt = contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT);
let second_compact_has_prompt = contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT);
assert_eq!(
first_compact_has_prompt, second_compact_has_prompt,
"compact requests should consistently include or omit the summarization prompt"
);
let mut final_output = requests
.last()
.unwrap_or_else(|| panic!("final turn request missing for {final_user_message}"))

View File

@@ -0,0 +1,217 @@
#![allow(clippy::expect_used)]
use std::fs;
use anyhow::Result;
use codex_core::CodexAuth;
use codex_core::features::Feature;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = TestCodexHarness::with_builder(
test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.features.enable(Feature::RemoteCompaction);
}),
)
.await?;
let codex = harness.test().codex.clone();
let responses_mock = responses::mount_sse_sequence(
harness.server(),
vec![
responses::sse(vec![
responses::ev_assistant_message("m1", "FIRST_REMOTE_REPLY"),
responses::ev_completed("resp-1"),
]),
responses::sse(vec![
responses::ev_assistant_message("m2", "AFTER_COMPACT_REPLY"),
responses::ev_completed("resp-2"),
]),
],
)
.await;
let compacted_history = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
}];
let compact_mock = responses::mount_compact_json_once(
harness.server(),
serde_json::json!({ "output": compacted_history.clone() }),
)
.await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello remote compact".into(),
}],
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "after compact".into(),
}],
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let compact_request = compact_mock.single_request();
assert_eq!(compact_request.path(), "/v1/responses/compact");
assert_eq!(
compact_request.header("chatgpt-account-id").as_deref(),
Some("account_id")
);
assert_eq!(
compact_request.header("authorization").as_deref(),
Some("Bearer Access Token")
);
let compact_body = compact_request.body_json();
assert_eq!(
compact_body.get("model").and_then(|v| v.as_str()),
Some(harness.test().session_configured.model.as_str())
);
let compact_body_text = compact_body.to_string();
assert!(
compact_body_text.contains("hello remote compact"),
"expected compact request to include user history"
);
assert!(
compact_body_text.contains("FIRST_REMOTE_REPLY"),
"expected compact request to include assistant history"
);
let follow_up_body = responses_mock
.requests()
.last()
.expect("follow-up request missing")
.body_json()
.to_string();
assert!(
follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
"expected follow-up request to use compacted history"
);
assert!(
!follow_up_body.contains("FIRST_REMOTE_REPLY"),
"expected follow-up request to drop pre-compaction assistant messages"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = TestCodexHarness::with_builder(
test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.features.enable(Feature::RemoteCompaction);
}),
)
.await?;
let codex = harness.test().codex.clone();
let rollout_path = harness.test().session_configured.rollout_path.clone();
let responses_mock = responses::mount_sse_once(
harness.server(),
responses::sse(vec![
responses::ev_assistant_message("m1", "COMPACT_BASELINE_REPLY"),
responses::ev_completed("resp-1"),
]),
)
.await;
let compacted_history = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "COMPACTED_USER_SUMMARY".to_string(),
}],
},
ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "COMPACTED_ASSISTANT_NOTE".to_string(),
}],
},
];
let compact_mock = responses::mount_compact_json_once(
harness.server(),
serde_json::json!({ "output": compacted_history.clone() }),
)
.await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "needs compaction".into(),
}],
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Shutdown).await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
assert_eq!(responses_mock.requests().len(), 1);
assert_eq!(compact_mock.requests().len(), 1);
let rollout_text = fs::read_to_string(&rollout_path)?;
let mut saw_compacted_history = false;
for line in rollout_text
.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
{
let Ok(entry) = serde_json::from_str::<RolloutLine>(line) else {
continue;
};
if let RolloutItem::Compacted(compacted) = entry.item
&& compacted.message.is_empty()
&& compacted.replacement_history.as_ref() == Some(&compacted_history)
{
saw_compacted_history = true;
break;
}
}
assert!(
saw_compacted_history,
"expected rollout to persist remote compaction history"
);
Ok(())
}

View File

@@ -76,6 +76,14 @@ fn is_ghost_snapshot_message(item: &Value) -> bool {
.is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
}
fn normalize_line_endings_str(text: &str) -> String {
if text.contains('\r') {
text.replace("\r\n", "\n").replace('\r', "\n")
} else {
text.to_string()
}
}
fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
request
.get("input")
@@ -98,6 +106,36 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
.unwrap_or_else(|| panic!("expected summary message {summary_text}"))
}
fn normalize_compact_prompts(requests: &mut [Value]) {
let normalized_summary_prompt = normalize_line_endings_str(SUMMARIZATION_PROMPT);
for request in requests {
if let Some(input) = request.get_mut("input").and_then(Value::as_array_mut) {
input.retain(|item| {
if item.get("type").and_then(Value::as_str) != Some("message")
|| item.get("role").and_then(Value::as_str) != Some("user")
{
return true;
}
let content = item
.get("content")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if let Some(first) = content.first() {
let text = first
.get("text")
.and_then(Value::as_str)
.unwrap_or_default();
let normalized_text = normalize_line_endings_str(text);
!(text.is_empty() || normalized_text == normalized_summary_prompt)
} else {
false
}
});
}
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
/// Scenario: compact an initial conversation, resume it, fork one turn back, and
/// ensure the model-visible history matches expectations at each request.
@@ -136,7 +174,8 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
user_turn(&forked, "AFTER_FORK").await;
// 3. Capture the requests to the model and validate the history slices.
let requests = gather_request_bodies(&server).await;
let mut requests = gather_request_bodies(&server).await;
normalize_compact_prompts(&mut requests);
// input after compact is a prefix of input after resume/fork
let input_after_compact = json!(requests[requests.len() - 3]["input"]);
@@ -168,6 +207,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
&fork_arr[..compact_arr.len()]
);
let expected_model = requests[0]["model"]
.as_str()
.unwrap_or_default()
.to_string();
let prompt = requests[0]["instructions"]
.as_str()
.unwrap_or_default()
@@ -538,6 +581,9 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
user_turn_3_after_fork
]);
normalize_line_endings(&mut expected);
if let Some(arr) = expected.as_array_mut() {
normalize_compact_prompts(arr);
}
assert_eq!(requests.len(), 5);
assert_eq!(json!(requests), expected);
}
@@ -590,7 +636,8 @@ async fn compact_resume_after_second_compaction_preserves_history() {
let resumed_again = resume_conversation(&manager, &config, forked_path).await;
user_turn(&resumed_again, AFTER_SECOND_RESUME).await;
let requests = gather_request_bodies(&server).await;
let mut requests = gather_request_bodies(&server).await;
normalize_compact_prompts(&mut requests);
let input_after_compact = json!(requests[requests.len() - 2]["input"]);
let input_after_resume = json!(requests[requests.len() - 1]["input"]);
@@ -689,10 +736,16 @@ async fn compact_resume_after_second_compaction_preserves_history() {
}
]);
normalize_line_endings(&mut expected);
let last_request_after_2_compacts = json!([{
let mut last_request_after_2_compacts = json!([{
"instructions": requests[requests.len() -1]["instructions"],
"input": requests[requests.len() -1]["input"],
}]);
if let Some(arr) = expected.as_array_mut() {
normalize_compact_prompts(arr);
}
if let Some(arr) = last_request_after_2_compacts.as_array_mut() {
normalize_compact_prompts(arr);
}
assert_eq!(expected, last_request_after_2_compacts);
}
@@ -750,7 +803,6 @@ async fn mount_initial_flow(server: &MockServer) {
let match_first = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains("\"text\":\"hello world\"")
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
&& !body.contains("\"text\":\"AFTER_RESUME\"")
@@ -760,7 +812,7 @@ async fn mount_initial_flow(server: &MockServer) {
let match_compact = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, SUMMARIZATION_PROMPT)
body_contains_text(body, SUMMARIZATION_PROMPT) || body.contains(&json_fragment(FIRST_REPLY))
};
mount_sse_once_match(server, match_compact, sse2).await;
@@ -794,7 +846,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
let match_second_compact = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, SUMMARIZATION_PROMPT) && body.contains("AFTER_FORK")
body.contains("AFTER_FORK")
};
mount_sse_once_match(server, match_second_compact, sse6).await;

View File

@@ -24,6 +24,7 @@ mod cli_stream;
mod client;
mod codex_delegate;
mod compact;
mod compact_remote;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;