Fix remote compaction estimator/payload instruction small mismatch (#10692)

## Summary This PR fixes a deterministic mismatch in remote compaction where pre-trim estimation and the `/v1/responses/compact` payload could use different base instructions. Before this change: - pre-trim estimation used model-derived instructions (`model_info.get_model_instructions(...)`) - compact payload used session base instructions (`sess.get_base_instructions()`) After this change: - remote pre-trim estimation and compact payload both use the same `BaseInstructions` instance from session state. ## Changes - Added a shared estimator entry point in `ContextManager`: - `estimate_token_count_with_base_instructions(&self, base_instructions: &BaseInstructions) -> Option<i64>` - Kept `estimate_token_count(&TurnContext)` as a thin wrapper that resolves model/personality instructions and delegates to the new helper. - Updated remote compaction flow to fetch base instructions once and reuse it for both: - trim preflight estimation - compact request payload construction - Added regression coverage for parity and behavior: - unit test verifying explicit-base estimator behavior - integration test proving remote compaction uses session override instructions and trims accordingly ## Why this matters This removes a deterministic divergence source where pre-trim could think the request fits while the actual compact request exceeded context because its instructions were longer/different. ## Scope In scope: - estimator/payload base-instructions parity in remote compaction Out of scope: - retry-on-`context_length_exceeded` - compaction threshold/headroom policy changes - broader trimming policy changes ## Codex author: `codex fork 019c2b24-c2df-7b31-a482-fb8cf7a28559`
2026-04-26 15:45:02 +00:00 · 2026-02-04 23:24:06 -08:00
parent cd5f49a619
commit dc7007beaa
5 changed files with 314 additions and 10 deletions
--- a/codex-rs/core/tests/suite/compact_remote.rs
+++ b/codex-rs/core/tests/suite/compact_remote.rs
@@ -25,6 +25,21 @@ use core_test_support::wait_for_event;
 use core_test_support::wait_for_event_match;
 use pretty_assertions::assert_eq;

+fn approx_token_count(text: &str) -> i64 {
+    i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
+}
+
+fn estimate_compact_input_tokens(request: &responses::ResponsesRequest) -> i64 {
+    request.input().into_iter().fold(0i64, |acc, item| {
+        acc.saturating_add(approx_token_count(&item.to_string()))
+    })
+}
+
+fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64 {
+    estimate_compact_input_tokens(request)
+        .saturating_add(approx_token_count(&request.instructions_text()))
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn remote_compact_replaces_history_for_followups() -> Result<()> {
    skip_if_no_network!(Ok(()));
@@ -351,6 +366,210 @@ async fn remote_compact_trims_function_call_history_to_fit_context_window() -> R
    Ok(())
 }

+#[cfg_attr(target_os = "windows", ignore)]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn remote_compact_trim_estimate_uses_session_base_instructions() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let first_user_message = "turn with baseline shell call";
+    let second_user_message = "turn with trailing shell call";
+    let baseline_retained_call_id = "baseline-retained-call";
+    let baseline_trailing_call_id = "baseline-trailing-call";
+    let override_retained_call_id = "override-retained-call";
+    let override_trailing_call_id = "override-trailing-call";
+    let retained_command = "printf retained-shell-output";
+    let trailing_command = "printf trailing-shell-output";
+
+    let baseline_harness = TestCodexHarness::with_builder(
+        test_codex()
+            .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+            .with_config(|config| {
+                config.features.enable(Feature::RemoteCompaction);
+                config.model_context_window = Some(200_000);
+            }),
+    )
+    .await?;
+    let baseline_codex = baseline_harness.test().codex.clone();
+
+    responses::mount_sse_sequence(
+        baseline_harness.server(),
+        vec![
+            sse(vec![
+                responses::ev_shell_command_call(baseline_retained_call_id, retained_command),
+                responses::ev_completed("baseline-retained-call-response"),
+            ]),
+            sse(vec![
+                responses::ev_assistant_message("baseline-retained-assistant", "retained complete"),
+                responses::ev_completed("baseline-retained-final-response"),
+            ]),
+            sse(vec![
+                responses::ev_shell_command_call(baseline_trailing_call_id, trailing_command),
+                responses::ev_completed("baseline-trailing-call-response"),
+            ]),
+            sse(vec![responses::ev_completed(
+                "baseline-trailing-final-response",
+            )]),
+        ],
+    )
+    .await;
+
+    baseline_codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: first_user_message.into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+        })
+        .await?;
+    wait_for_event(&baseline_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    baseline_codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: second_user_message.into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+        })
+        .await?;
+    wait_for_event(&baseline_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    let baseline_compact_mock = responses::mount_compact_json_once(
+        baseline_harness.server(),
+        serde_json::json!({ "output": [] }),
+    )
+    .await;
+
+    baseline_codex.submit(Op::Compact).await?;
+    wait_for_event(&baseline_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    let baseline_compact_request = baseline_compact_mock.single_request();
+    assert!(
+        baseline_compact_request.has_function_call(baseline_retained_call_id),
+        "expected baseline compact request to retain older function call history"
+    );
+    assert!(
+        baseline_compact_request.has_function_call(baseline_trailing_call_id),
+        "expected baseline compact request to retain trailing function call history"
+    );
+
+    let baseline_input_tokens = estimate_compact_input_tokens(&baseline_compact_request);
+    let baseline_payload_tokens = estimate_compact_payload_tokens(&baseline_compact_request);
+
+    let override_base_instructions =
+        format!("REMOTE_BASE_INSTRUCTIONS_OVERRIDE {}", "x".repeat(120_000));
+    let override_context_window = baseline_payload_tokens.saturating_add(1_000);
+    let pretrim_override_estimate =
+        baseline_input_tokens.saturating_add(approx_token_count(&override_base_instructions));
+    assert!(
+        pretrim_override_estimate > override_context_window,
+        "expected override instructions to push pre-trim estimate past the context window"
+    );
+
+    let override_harness = TestCodexHarness::with_builder(
+        test_codex()
+            .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+            .with_config({
+                let override_base_instructions = override_base_instructions.clone();
+                move |config| {
+                    config.features.enable(Feature::RemoteCompaction);
+                    config.model_context_window = Some(override_context_window);
+                    config.base_instructions = Some(override_base_instructions);
+                }
+            }),
+    )
+    .await?;
+    let override_codex = override_harness.test().codex.clone();
+
+    responses::mount_sse_sequence(
+        override_harness.server(),
+        vec![
+            sse(vec![
+                responses::ev_shell_command_call(override_retained_call_id, retained_command),
+                responses::ev_completed("override-retained-call-response"),
+            ]),
+            sse(vec![
+                responses::ev_assistant_message("override-retained-assistant", "retained complete"),
+                responses::ev_completed("override-retained-final-response"),
+            ]),
+            sse(vec![
+                responses::ev_shell_command_call(override_trailing_call_id, trailing_command),
+                responses::ev_completed("override-trailing-call-response"),
+            ]),
+            sse(vec![responses::ev_completed(
+                "override-trailing-final-response",
+            )]),
+        ],
+    )
+    .await;
+
+    override_codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: first_user_message.into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+        })
+        .await?;
+    wait_for_event(&override_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    override_codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: second_user_message.into(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+        })
+        .await?;
+    wait_for_event(&override_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    let override_compact_mock = responses::mount_compact_json_once(
+        override_harness.server(),
+        serde_json::json!({ "output": [] }),
+    )
+    .await;
+
+    override_codex.submit(Op::Compact).await?;
+    wait_for_event(&override_codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    let override_compact_request = override_compact_mock.single_request();
+    assert_eq!(
+        override_compact_request.instructions_text(),
+        override_base_instructions
+    );
+    assert!(
+        override_compact_request.has_function_call(override_retained_call_id),
+        "expected remote compact request to preserve older function call history"
+    );
+    assert!(
+        !override_compact_request.has_function_call(override_trailing_call_id),
+        "expected remote compact request to trim trailing function call history with override instructions"
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn remote_manual_compact_emits_context_compaction_items() -> Result<()> {
    skip_if_no_network!(Ok(()));