mirror of
https://github.com/openai/codex.git
synced 2026-05-02 02:17:22 +00:00
[core] simplify inline server-side compaction handling [ci changed_files]
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -9,9 +9,13 @@ use codex_core::ModelProviderInfo;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::compact::SUMMARY_PREFIX;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::models_manager::manager::RefreshStrategy;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ConversationStartParams;
|
||||
use codex_protocol::protocol::ErrorEvent;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
@@ -22,11 +26,13 @@ use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
|
||||
use codex_protocol::protocol::RealtimeEvent;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::context_snapshot;
|
||||
use core_test_support::context_snapshot::ContextSnapshotOptions;
|
||||
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::mount_models_once;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_websocket_server;
|
||||
@@ -62,6 +68,18 @@ fn summary_with_prefix(summary: &str) -> String {
|
||||
format!("{SUMMARY_PREFIX}\n{summary}")
|
||||
}
|
||||
|
||||
fn model_info_with_context_window(slug: &str, context_window: i64) -> ModelInfo {
|
||||
let models_response: ModelsResponse =
|
||||
serde_json::from_str(include_str!("../../models.json")).expect("valid models.json");
|
||||
let mut model_info = models_response
|
||||
.models
|
||||
.into_iter()
|
||||
.find(|model| model.slug == slug)
|
||||
.unwrap_or_else(|| panic!("model `{slug}` missing from models.json"));
|
||||
model_info.context_window = Some(context_window);
|
||||
model_info
|
||||
}
|
||||
|
||||
fn context_snapshot_options() -> ContextSnapshotOptions {
|
||||
ContextSnapshotOptions::default()
|
||||
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 64 })
|
||||
@@ -488,6 +506,13 @@ async fn auto_server_side_compaction_uses_inline_context_management() -> Result<
|
||||
"expected three inline /responses requests"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requests[0].body_json().get("context_management"),
|
||||
Some(&json!([{
|
||||
"type": "compaction",
|
||||
"compact_threshold": compact_threshold,
|
||||
}])),
|
||||
);
|
||||
let inline_request = requests[1].body_json();
|
||||
assert_eq!(
|
||||
inline_request.get("context_management"),
|
||||
@@ -498,14 +523,21 @@ async fn auto_server_side_compaction_uses_inline_context_management() -> Result<
|
||||
);
|
||||
|
||||
let post_inline_request = &requests[2];
|
||||
assert_eq!(
|
||||
post_inline_request.body_json().get("context_management"),
|
||||
Some(&json!([{
|
||||
"type": "compaction",
|
||||
"compact_threshold": compact_threshold,
|
||||
}])),
|
||||
);
|
||||
let post_inline_body = post_inline_request.body_json().to_string();
|
||||
assert!(
|
||||
post_inline_request.body_contains_text(&inline_summary),
|
||||
"expected subsequent request to reuse inline compaction item"
|
||||
);
|
||||
assert!(
|
||||
post_inline_request.body_contains_text("<permissions instructions>"),
|
||||
"expected subsequent request to preserve canonical context after inline compaction"
|
||||
!post_inline_request.body_contains_text("<permissions instructions>"),
|
||||
"expected subsequent request to rely on the compaction item rather than duplicate canonical context"
|
||||
);
|
||||
assert!(
|
||||
!post_inline_request.body_contains_text(first_turn_text),
|
||||
@@ -533,7 +565,85 @@ async fn auto_server_side_compaction_uses_inline_context_management() -> Result<
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_server_side_compaction_keeps_current_turn_inputs_for_follow_ups() -> Result<()> {
|
||||
async fn snapshot_request_shape_auto_server_side_compaction_history_layout() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let compact_threshold = 120;
|
||||
let inline_summary = summary_with_prefix("INLINE_SERVER_SUMMARY");
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(move |config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ServerSideCompaction)
|
||||
.expect("enable server-side compaction");
|
||||
config.model_auto_compact_token_limit = Some(compact_threshold);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_response_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
responses::sse_response(sse(vec![
|
||||
responses::ev_assistant_message("m1", "FIRST_REMOTE_REPLY"),
|
||||
responses::ev_completed_with_tokens("resp-1", 500),
|
||||
])),
|
||||
responses::sse_response(sse(vec![
|
||||
responses::ev_compaction(&inline_summary),
|
||||
responses::ev_assistant_message("m2", "AFTER_INLINE_REPLY"),
|
||||
responses::ev_completed_with_tokens("resp-2", 80),
|
||||
])),
|
||||
responses::sse_response(sse(vec![responses::ev_completed("resp-3")])),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_text_turn_and_wait(&codex, "inline compact turn one").await?;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "inline compact turn two".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| {
|
||||
matches!(event, EventMsg::ContextCompacted(_))
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
submit_text_turn_and_wait(&codex, "inline compact turn three").await?;
|
||||
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
3,
|
||||
"expected three inline /responses requests"
|
||||
);
|
||||
|
||||
insta::assert_snapshot!(
|
||||
"auto_server_side_compaction_history_layout_shapes",
|
||||
format_labeled_requests_snapshot(
|
||||
"Inline server-side compaction: the compacting turn opts into context_management, and the next request reuses the streamed compaction checkpoint directly.",
|
||||
&[
|
||||
("Inline Compaction Request", &requests[1]),
|
||||
("Post-Inline-Compaction History Layout", &requests[2]),
|
||||
]
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_server_side_compaction_follow_ups_use_compaction_checkpoint() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let compact_threshold = 120;
|
||||
@@ -592,26 +702,17 @@ async fn auto_server_side_compaction_keeps_current_turn_inputs_for_follow_ups()
|
||||
"expected same-turn follow-up to include the inline compaction item"
|
||||
);
|
||||
assert!(
|
||||
follow_up_request.body_contains_text("<permissions instructions>"),
|
||||
"expected same-turn follow-up to preserve canonical context after inline compaction"
|
||||
!follow_up_request.body_contains_text("<permissions instructions>"),
|
||||
"expected same-turn follow-up to rely on the compaction item rather than duplicate canonical context"
|
||||
);
|
||||
assert!(
|
||||
follow_up_request.body_contains_text(second_turn_text),
|
||||
"expected same-turn follow-up to retain the current turn user input"
|
||||
!follow_up_request.body_contains_text(second_turn_text),
|
||||
"expected same-turn follow-up to replace prior plaintext history with the compaction item"
|
||||
);
|
||||
assert!(
|
||||
!follow_up_request.body_contains_text(first_turn_text),
|
||||
"expected same-turn follow-up to drop pre-compaction history"
|
||||
);
|
||||
assert!(
|
||||
follow_up_body
|
||||
.find(second_turn_text)
|
||||
.expect("current turn text in follow-up request")
|
||||
< follow_up_body
|
||||
.find("INLINE_SERVER_SUMMARY")
|
||||
.expect("inline compaction marker in follow-up request"),
|
||||
"expected current-turn items to remain ahead of the inline compaction item"
|
||||
);
|
||||
assert!(
|
||||
follow_up_request
|
||||
.function_call_output_text("call-inline-mid-turn")
|
||||
@@ -2516,6 +2617,171 @@ async fn snapshot_request_shape_remote_pre_turn_compaction_strips_incoming_model
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn previous_model_preflight_remote_compaction_still_runs_with_inline_feature_enabled()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let compact_threshold = 200;
|
||||
let previous_model = "gpt-5.2-codex";
|
||||
let next_model = "gpt-5.1-codex-max";
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_model(previous_model)
|
||||
.with_config(move |config| {
|
||||
config.model_auto_compact_token_limit = Some(compact_threshold);
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ServerSideCompaction)
|
||||
.expect("enable server-side compaction");
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
let models_mock = mount_models_once(
|
||||
harness.server(),
|
||||
ModelsResponse {
|
||||
models: vec![
|
||||
model_info_with_context_window(previous_model, 273_000),
|
||||
model_info_with_context_window(next_model, 125_000),
|
||||
],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
let _ = harness
|
||||
.test()
|
||||
.thread_manager
|
||||
.get_models_manager()
|
||||
.list_models(RefreshStrategy::Online)
|
||||
.await;
|
||||
|
||||
let initial_turn_request_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "BEFORE_SWITCH_REPLY"),
|
||||
responses::ev_completed_with_tokens("r1", 120_000),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let post_compact_turn_request_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "AFTER_SWITCH_REPLY"),
|
||||
responses::ev_completed_with_tokens("r2", 80),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let compact_mock = responses::mount_compact_user_history_with_summary_once(
|
||||
harness.server(),
|
||||
&summary_with_prefix("REMOTE_SWITCH_SUMMARY"),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "BEFORE_SWITCH_USER".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: harness.test().cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: previous_model.to_string(),
|
||||
effort: None,
|
||||
summary: None,
|
||||
service_tier: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "AFTER_SWITCH_USER".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: harness.test().cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: next_model.to_string(),
|
||||
effort: None,
|
||||
summary: None,
|
||||
service_tier: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(
|
||||
compact_mock.requests().len(),
|
||||
1,
|
||||
"expected previous-model preflight to keep using remote compaction even when inline server-side compaction is enabled"
|
||||
);
|
||||
assert_eq!(models_mock.requests().len(), 1, "expected one models fetch");
|
||||
assert_eq!(
|
||||
initial_turn_request_mock.requests().len(),
|
||||
1,
|
||||
"expected initial turn request"
|
||||
);
|
||||
assert_eq!(
|
||||
post_compact_turn_request_mock.requests().len(),
|
||||
1,
|
||||
"expected post-compaction follow-up request"
|
||||
);
|
||||
let initial_turn_request = initial_turn_request_mock.single_request();
|
||||
let compact_request = compact_mock.single_request();
|
||||
let post_compact_turn_request = post_compact_turn_request_mock.single_request();
|
||||
assert_eq!(
|
||||
initial_turn_request
|
||||
.body_json()
|
||||
.get("model")
|
||||
.and_then(serde_json::Value::as_str),
|
||||
Some(previous_model)
|
||||
);
|
||||
assert_eq!(
|
||||
compact_request
|
||||
.body_json()
|
||||
.get("model")
|
||||
.and_then(serde_json::Value::as_str),
|
||||
Some(previous_model)
|
||||
);
|
||||
assert_eq!(
|
||||
post_compact_turn_request
|
||||
.body_json()
|
||||
.get("model")
|
||||
.and_then(serde_json::Value::as_str),
|
||||
Some(next_model)
|
||||
);
|
||||
let compact_body = compact_request.body_json().to_string();
|
||||
assert!(
|
||||
!compact_body.contains("<model_switch>"),
|
||||
"expected previous-model preflight compact request to strip model-switch context"
|
||||
);
|
||||
let follow_up_body = post_compact_turn_request.body_json().to_string();
|
||||
assert!(
|
||||
follow_up_body.contains("<model_switch>"),
|
||||
"expected post-compaction next-model request to restore model-switch context"
|
||||
);
|
||||
assert_eq!(
|
||||
post_compact_turn_request
|
||||
.body_json()
|
||||
.get("context_management"),
|
||||
Some(&json!([{
|
||||
"type": "compaction",
|
||||
"compact_threshold": compact_threshold,
|
||||
}])),
|
||||
"expected eligible next-model requests to always include inline context management after preflight compaction"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// TODO(ccunningham): Update once remote pre-turn compaction context-overflow handling includes
|
||||
// incoming user input and emits richer oversized-input messaging.
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
source: core/tests/suite/compact_remote.rs
|
||||
assertion_line: 558
|
||||
expression: "format_labeled_requests_snapshot(\"Inline server-side compaction: the compacting turn opts into context_management, and the next request reuses the streamed compaction checkpoint directly.\",\n&[(\"Inline Compaction Request\", &requests[1]),\n(\"Post-Inline-Compaction History Layout\", &requests[2]),])"
|
||||
---
|
||||
Scenario: Inline server-side compaction: the compacting turn opts into context_management, and the next request reuses the streamed compaction checkpoint directly.
|
||||
|
||||
## Inline Compaction Request
|
||||
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
|
||||
01:message/user[2]:
|
||||
[01] <AGENTS_MD>
|
||||
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
|
||||
02:message/user:inline compact turn one
|
||||
03:message/assistant:FIRST_REMOTE_REPLY
|
||||
04:message/user:inline compact turn two
|
||||
|
||||
## Post-Inline-Compaction History Layout
|
||||
00:compaction:encrypted=true
|
||||
01:message/assistant:AFTER_INLINE_REPLY
|
||||
02:message/user:inline compact turn three
|
||||
Reference in New Issue
Block a user