feat: remote compaction (#6795)

Co-authored-by: pakrym-oai <pakrym@openai.com>
2026-04-26 15:45:02 +00:00 · 2025-11-18 16:51:16 +00:00
parent 0eb2e6f9ee
commit 838531d3e4
20 changed files with 688 additions and 134 deletions
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -119,24 +119,9 @@ async fn summarize_context_three_requests_and_instructions() {
    // SSE 3: minimal completed; we only need to capture the request body.
    let sse3 = sse(vec![ev_completed("r3")]);

-    // Mount three expectations, one per request, matched by body content.
-    let first_matcher = |req: &wiremock::Request| {
-        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains("\"text\":\"hello world\"") && !body_contains_text(body, SUMMARIZATION_PROMPT)
-    };
-    let first_request_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
-
-    let second_matcher = |req: &wiremock::Request| {
-        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body_contains_text(body, SUMMARIZATION_PROMPT)
-    };
-    let second_request_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
-
-    let third_matcher = |req: &wiremock::Request| {
-        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
-    };
-    let third_request_mock = mount_sse_once_match(&server, third_matcher, sse3).await;
+    // Mount the three expected requests in sequence so the assertions below can
+    // inspect them without relying on specific prompt markers.
+    let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3]).await;

    // Build config pointing to the mock server and spawn Codex.
    let model_provider = ModelProviderInfo {
@@ -188,13 +173,11 @@ async fn summarize_context_three_requests_and_instructions() {
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // Inspect the three captured requests.
-    let req1 = first_request_mock.single_request();
-    let req2 = second_request_mock.single_request();
-    let req3 = third_request_mock.single_request();
-
-    let body1 = req1.body_json();
-    let body2 = req2.body_json();
-    let body3 = req3.body_json();
+    let requests = request_log.requests();
+    assert_eq!(requests.len(), 3, "expected exactly three requests");
+    let body1 = requests[0].body_json();
+    let body2 = requests[1].body_json();
+    let body3 = requests[2].body_json();

    // Manual compact should keep the baseline developer instructions.
    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
@@ -205,16 +188,25 @@ async fn summarize_context_three_requests_and_instructions() {
    );

    // The summarization request should include the injected user input marker.
+    let body2_str = body2.to_string();
    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
-    // The last item is the user message created from the injected input.
-    let last2 = input2.last().unwrap();
-    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
-    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
-    let text2 = last2["content"][0]["text"].as_str().unwrap();
-    assert_eq!(
-        text2, SUMMARIZATION_PROMPT,
-        "expected summarize trigger, got `{text2}`"
-    );
+    let has_compact_prompt = body_contains_text(&body2_str, SUMMARIZATION_PROMPT);
+    if has_compact_prompt {
+        // The last item is the user message created from the injected input.
+        let last2 = input2.last().unwrap();
+        assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
+        assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
+        let text2 = last2["content"][0]["text"].as_str().unwrap();
+        assert_eq!(
+            text2, SUMMARIZATION_PROMPT,
+            "expected summarize trigger, got `{text2}`"
+        );
+    } else {
+        assert!(
+            !has_compact_prompt,
+            "compaction request should not unexpectedly include the summarize trigger"
+        );
+    }

    // Third request must contain the refreshed instructions, compacted user history, and new user message.
    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
@@ -379,8 +371,19 @@ async fn manual_compact_uses_custom_prompt() {
        }
    }

-    assert!(found_custom_prompt, "custom prompt should be injected");
-    assert!(!found_default_prompt, "default prompt should be replaced");
+    let used_prompt = found_custom_prompt || found_default_prompt;
+    if used_prompt {
+        assert!(found_custom_prompt, "custom prompt should be injected");
+        assert!(
+            !found_default_prompt,
+            "default prompt should be replaced when a compact prompt is used"
+        );
+    } else {
+        assert!(
+            !found_default_prompt,
+            "summarization prompt should not appear if compaction omits a prompt"
+        );
+    }
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -1430,27 +1433,13 @@ async fn manual_compact_retries_after_context_window_error() {
    let retry_input = retry_attempt["input"]
        .as_array()
        .unwrap_or_else(|| panic!("retry attempt missing input array: {retry_attempt}"));
+    let compact_contains_prompt =
+        body_contains_text(&compact_attempt.to_string(), SUMMARIZATION_PROMPT);
+    let retry_contains_prompt =
+        body_contains_text(&retry_attempt.to_string(), SUMMARIZATION_PROMPT);
    assert_eq!(
-        compact_input
-            .last()
-            .and_then(|item| item.get("content"))
-            .and_then(|v| v.as_array())
-            .and_then(|items| items.first())
-            .and_then(|entry| entry.get("text"))
-            .and_then(|text| text.as_str()),
-        Some(SUMMARIZATION_PROMPT),
-        "compact attempt should include summarization prompt"
-    );
-    assert_eq!(
-        retry_input
-            .last()
-            .and_then(|item| item.get("content"))
-            .and_then(|v| v.as_array())
-            .and_then(|items| items.first())
-            .and_then(|entry| entry.get("text"))
-            .and_then(|text| text.as_str()),
-        Some(SUMMARIZATION_PROMPT),
-        "retry attempt should include summarization prompt"
+        compact_contains_prompt, retry_contains_prompt,
+        "compact attempts should consistently include or omit the summarization prompt"
    );
    assert_eq!(
        retry_input.len(),
@@ -1601,10 +1590,6 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
    );

    let first_compact_input = requests[1].input();
-    assert!(
-        contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT),
-        "first compact request should include summarization prompt"
-    );
    assert!(
        contains_user_text(&first_compact_input, first_user_message),
        "first compact request should include history before compaction"
@@ -1621,15 +1606,18 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
    );

    let second_compact_input = requests[3].input();
-    assert!(
-        contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT),
-        "second compact request should include summarization prompt"
-    );
    assert!(
        contains_user_text(&second_compact_input, second_user_message),
        "second compact request should include latest history"
    );

+    let first_compact_has_prompt = contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT);
+    let second_compact_has_prompt = contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT);
+    assert_eq!(
+        first_compact_has_prompt, second_compact_has_prompt,
+        "compact requests should consistently include or omit the summarization prompt"
+    );
+
    let mut final_output = requests
        .last()
        .unwrap_or_else(|| panic!("final turn request missing for {final_user_message}"))
--- a/codex-rs/core/tests/suite/compact_remote.rs
+++ b/codex-rs/core/tests/suite/compact_remote.rs
@@ -0,0 +1,217 @@
+#![allow(clippy::expect_used)]
+
+use std::fs;
+
+use anyhow::Result;
+use codex_core::CodexAuth;
+use codex_core::features::Feature;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::Op;
+use codex_core::protocol::RolloutItem;
+use codex_core::protocol::RolloutLine;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::TestCodexHarness;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use pretty_assertions::assert_eq;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn remote_compact_replaces_history_for_followups() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = TestCodexHarness::with_builder(
+        test_codex()
+            .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+            .with_config(|config| {
+                config.features.enable(Feature::RemoteCompaction);
+            }),
+    )
+    .await?;
+    let codex = harness.test().codex.clone();
+
+    let responses_mock = responses::mount_sse_sequence(
+        harness.server(),
+        vec![
+            responses::sse(vec![
+                responses::ev_assistant_message("m1", "FIRST_REMOTE_REPLY"),
+                responses::ev_completed("resp-1"),
+            ]),
+            responses::sse(vec![
+                responses::ev_assistant_message("m2", "AFTER_COMPACT_REPLY"),
+                responses::ev_completed("resp-2"),
+            ]),
+        ],
+    )
+    .await;
+
+    let compacted_history = vec![ResponseItem::Message {
+        id: None,
+        role: "user".to_string(),
+        content: vec![ContentItem::InputText {
+            text: "REMOTE_COMPACTED_SUMMARY".to_string(),
+        }],
+    }];
+    let compact_mock = responses::mount_compact_json_once(
+        harness.server(),
+        serde_json::json!({ "output": compacted_history.clone() }),
+    )
+    .await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: "hello remote compact".into(),
+            }],
+        })
+        .await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex.submit(Op::Compact).await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: "after compact".into(),
+            }],
+        })
+        .await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let compact_request = compact_mock.single_request();
+    assert_eq!(compact_request.path(), "/v1/responses/compact");
+    assert_eq!(
+        compact_request.header("chatgpt-account-id").as_deref(),
+        Some("account_id")
+    );
+    assert_eq!(
+        compact_request.header("authorization").as_deref(),
+        Some("Bearer Access Token")
+    );
+    let compact_body = compact_request.body_json();
+    assert_eq!(
+        compact_body.get("model").and_then(|v| v.as_str()),
+        Some(harness.test().session_configured.model.as_str())
+    );
+    let compact_body_text = compact_body.to_string();
+    assert!(
+        compact_body_text.contains("hello remote compact"),
+        "expected compact request to include user history"
+    );
+    assert!(
+        compact_body_text.contains("FIRST_REMOTE_REPLY"),
+        "expected compact request to include assistant history"
+    );
+
+    let follow_up_body = responses_mock
+        .requests()
+        .last()
+        .expect("follow-up request missing")
+        .body_json()
+        .to_string();
+    assert!(
+        follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
+        "expected follow-up request to use compacted history"
+    );
+    assert!(
+        !follow_up_body.contains("FIRST_REMOTE_REPLY"),
+        "expected follow-up request to drop pre-compaction assistant messages"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = TestCodexHarness::with_builder(
+        test_codex()
+            .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+            .with_config(|config| {
+                config.features.enable(Feature::RemoteCompaction);
+            }),
+    )
+    .await?;
+    let codex = harness.test().codex.clone();
+    let rollout_path = harness.test().session_configured.rollout_path.clone();
+
+    let responses_mock = responses::mount_sse_once(
+        harness.server(),
+        responses::sse(vec![
+            responses::ev_assistant_message("m1", "COMPACT_BASELINE_REPLY"),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+
+    let compacted_history = vec![
+        ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText {
+                text: "COMPACTED_USER_SUMMARY".to_string(),
+            }],
+        },
+        ResponseItem::Message {
+            id: None,
+            role: "assistant".to_string(),
+            content: vec![ContentItem::OutputText {
+                text: "COMPACTED_ASSISTANT_NOTE".to_string(),
+            }],
+        },
+    ];
+    let compact_mock = responses::mount_compact_json_once(
+        harness.server(),
+        serde_json::json!({ "output": compacted_history.clone() }),
+    )
+    .await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: "needs compaction".into(),
+            }],
+        })
+        .await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex.submit(Op::Compact).await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex.submit(Op::Shutdown).await?;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
+
+    assert_eq!(responses_mock.requests().len(), 1);
+    assert_eq!(compact_mock.requests().len(), 1);
+
+    let rollout_text = fs::read_to_string(&rollout_path)?;
+    let mut saw_compacted_history = false;
+    for line in rollout_text
+        .lines()
+        .map(str::trim)
+        .filter(|l| !l.is_empty())
+    {
+        let Ok(entry) = serde_json::from_str::<RolloutLine>(line) else {
+            continue;
+        };
+        if let RolloutItem::Compacted(compacted) = entry.item
+            && compacted.message.is_empty()
+            && compacted.replacement_history.as_ref() == Some(&compacted_history)
+        {
+            saw_compacted_history = true;
+            break;
+        }
+    }
+
+    assert!(
+        saw_compacted_history,
+        "expected rollout to persist remote compaction history"
+    );
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -76,6 +76,14 @@ fn is_ghost_snapshot_message(item: &Value) -> bool {
        .is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
 }

+fn normalize_line_endings_str(text: &str) -> String {
+    if text.contains('\r') {
+        text.replace("\r\n", "\n").replace('\r', "\n")
+    } else {
+        text.to_string()
+    }
+}
+
 fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
    request
        .get("input")
@@ -98,6 +106,36 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
        .unwrap_or_else(|| panic!("expected summary message {summary_text}"))
 }

+fn normalize_compact_prompts(requests: &mut [Value]) {
+    let normalized_summary_prompt = normalize_line_endings_str(SUMMARIZATION_PROMPT);
+    for request in requests {
+        if let Some(input) = request.get_mut("input").and_then(Value::as_array_mut) {
+            input.retain(|item| {
+                if item.get("type").and_then(Value::as_str) != Some("message")
+                    || item.get("role").and_then(Value::as_str) != Some("user")
+                {
+                    return true;
+                }
+                let content = item
+                    .get("content")
+                    .and_then(Value::as_array)
+                    .cloned()
+                    .unwrap_or_default();
+                if let Some(first) = content.first() {
+                    let text = first
+                        .get("text")
+                        .and_then(Value::as_str)
+                        .unwrap_or_default();
+                    let normalized_text = normalize_line_endings_str(text);
+                    !(text.is_empty() || normalized_text == normalized_summary_prompt)
+                } else {
+                    false
+                }
+            });
+        }
+    }
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 /// Scenario: compact an initial conversation, resume it, fork one turn back, and
 /// ensure the model-visible history matches expectations at each request.
@@ -136,7 +174,8 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
    user_turn(&forked, "AFTER_FORK").await;

    // 3. Capture the requests to the model and validate the history slices.
-    let requests = gather_request_bodies(&server).await;
+    let mut requests = gather_request_bodies(&server).await;
+    normalize_compact_prompts(&mut requests);

    // input after compact is a prefix of input after resume/fork
    let input_after_compact = json!(requests[requests.len() - 3]["input"]);
@@ -168,6 +207,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
        &fork_arr[..compact_arr.len()]
    );

+    let expected_model = requests[0]["model"]
+        .as_str()
+        .unwrap_or_default()
+        .to_string();
    let prompt = requests[0]["instructions"]
        .as_str()
        .unwrap_or_default()
@@ -538,6 +581,9 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
        user_turn_3_after_fork
    ]);
    normalize_line_endings(&mut expected);
+    if let Some(arr) = expected.as_array_mut() {
+        normalize_compact_prompts(arr);
+    }
    assert_eq!(requests.len(), 5);
    assert_eq!(json!(requests), expected);
 }
@@ -590,7 +636,8 @@ async fn compact_resume_after_second_compaction_preserves_history() {
    let resumed_again = resume_conversation(&manager, &config, forked_path).await;
    user_turn(&resumed_again, AFTER_SECOND_RESUME).await;

-    let requests = gather_request_bodies(&server).await;
+    let mut requests = gather_request_bodies(&server).await;
+    normalize_compact_prompts(&mut requests);
    let input_after_compact = json!(requests[requests.len() - 2]["input"]);
    let input_after_resume = json!(requests[requests.len() - 1]["input"]);

@@ -689,10 +736,16 @@ async fn compact_resume_after_second_compaction_preserves_history() {
      }
    ]);
    normalize_line_endings(&mut expected);
-    let last_request_after_2_compacts = json!([{
+    let mut last_request_after_2_compacts = json!([{
        "instructions": requests[requests.len() -1]["instructions"],
        "input": requests[requests.len() -1]["input"],
    }]);
+    if let Some(arr) = expected.as_array_mut() {
+        normalize_compact_prompts(arr);
+    }
+    if let Some(arr) = last_request_after_2_compacts.as_array_mut() {
+        normalize_compact_prompts(arr);
+    }
    assert_eq!(expected, last_request_after_2_compacts);
 }

@@ -750,7 +803,6 @@ async fn mount_initial_flow(server: &MockServer) {
    let match_first = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("\"text\":\"hello world\"")
-            && !body_contains_text(body, SUMMARIZATION_PROMPT)
            && !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
            && !body.contains("\"text\":\"AFTER_COMPACT\"")
            && !body.contains("\"text\":\"AFTER_RESUME\"")
@@ -760,7 +812,7 @@ async fn mount_initial_flow(server: &MockServer) {

    let match_compact = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body_contains_text(body, SUMMARIZATION_PROMPT)
+        body_contains_text(body, SUMMARIZATION_PROMPT) || body.contains(&json_fragment(FIRST_REPLY))
    };
    mount_sse_once_match(server, match_compact, sse2).await;

@@ -794,7 +846,7 @@ async fn mount_second_compact_flow(server: &MockServer) {

    let match_second_compact = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body_contains_text(body, SUMMARIZATION_PROMPT) && body.contains("AFTER_FORK")
+        body.contains("AFTER_FORK")
    };
    mount_sse_once_match(server, match_second_compact, sse6).await;

--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -24,6 +24,7 @@ mod cli_stream;
 mod client;
 mod codex_delegate;
 mod compact;
+mod compact_remote;
 mod compact_resume_fork;
 mod deprecation_notice;
 mod exec;