Compact before sampling when switching to a smaller context model

This commit is contained in:
Ahmed Ibrahim
2026-02-10 19:51:40 -08:00
parent e5c2ecdc92
commit 4290b8731f
2 changed files with 445 additions and 3 deletions

View File

@@ -3858,15 +3858,16 @@ pub(crate) async fn run_turn(
let model_info = turn_context.model_info.clone();
let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX);
let total_usage_tokens = sess.get_total_token_usage().await;
let event = EventMsg::TurnStarted(TurnStartedEvent {
model_context_window: turn_context.model_context_window(),
collaboration_mode_kind: turn_context.collaboration_mode.mode,
});
sess.send_event(&turn_context, event).await;
if total_usage_tokens >= auto_compact_limit
&& run_auto_compact(&sess, &turn_context).await.is_err()
if run_pre_sampling_compact(&sess, &turn_context)
.await
.is_err()
{
return None;
}
@@ -4115,6 +4116,52 @@ async fn run_auto_compact(sess: &Arc<Session>, turn_context: &Arc<TurnContext>)
Ok(())
}
async fn run_pre_sampling_compact(
sess: &Arc<Session>,
turn_context: &Arc<TurnContext>,
) -> CodexResult<()> {
let total_usage_tokens = sess.get_total_token_usage().await;
let auto_compact_limit = turn_context
.model_info
.auto_compact_token_limit()
.unwrap_or(i64::MAX);
// Compact with previous model if the model was switched and previous context window is larger than the new one
if let Some(previous_turn_context) = sess.previous_turn_context().await
&& should_run_inline_compact_with_previous_context(
total_usage_tokens,
&previous_turn_context,
turn_context.as_ref(),
) {
run_auto_compact(sess, &previous_turn_context).await?;
}
// Compact if the total usage tokens are greater than the auto compact limit
if total_usage_tokens >= auto_compact_limit {
run_auto_compact(sess, turn_context).await?;
}
Ok(())
}
fn should_run_inline_compact_with_previous_context(
total_usage_tokens: i64,
previous_turn_context: &TurnContext,
turn_context: &TurnContext,
) -> bool {
let Some(old_context_window) = previous_turn_context.model_context_window() else {
return false;
};
let Some(new_context_window) = turn_context.model_context_window() else {
return false;
};
let new_auto_compact_limit = turn_context
.model_info
.auto_compact_token_limit()
.unwrap_or(i64::MAX);
total_usage_tokens > new_auto_compact_limit
&& previous_turn_context.model_info.slug != turn_context.model_info.slug
&& old_context_window > new_context_window
}
fn filter_connectors_for_input(
connectors: Vec<connectors::AppInfo>,
input: &[ResponseItem],

View File

@@ -5,6 +5,8 @@ use codex_core::built_in_model_providers;
use codex_core::compact::SUMMARIZATION_PROMPT;
use codex_core::compact::SUMMARY_PREFIX;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::models_manager::manager::RefreshStrategy;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ItemCompletedEvent;
@@ -16,9 +18,18 @@ use codex_core::protocol::SandboxPolicy;
use codex_core::protocol::WarningEvent;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::items::TurnItem;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::openai_models::TruncationPolicyConfig;
use codex_protocol::openai_models::default_input_modalities;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_local_shell_call;
use core_test_support::responses::ev_reasoning_item;
use core_test_support::responses::mount_models_once;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
@@ -110,6 +121,37 @@ fn non_openai_model_provider(server: &MockServer) -> ModelProviderInfo {
provider
}
fn test_remote_model_with_context_window(slug: &str, context_window: i64) -> ModelInfo {
ModelInfo {
slug: slug.to_string(),
display_name: format!("{slug} display"),
description: Some(format!("{slug} description")),
default_reasoning_level: Some(ReasoningEffort::Medium),
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::ShellCommand,
visibility: ModelVisibility::List,
supported_in_api: true,
input_modalities: default_input_modalities(),
priority: 1,
upgrade: None,
base_instructions: "base instructions".to_string(),
model_messages: None,
supports_reasoning_summaries: false,
support_verbosity: false,
default_verbosity: None,
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
context_window: Some(context_window),
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn summarize_context_three_requests_and_instructions() {
skip_if_no_network!();
@@ -1551,6 +1593,359 @@ async fn auto_compact_runs_after_resume_when_token_usage_is_over_limit() {
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_runs_pre_sampling_when_switching_to_smaller_context_window_model() {
skip_if_no_network!();
let server = MockServer::start().await;
let large_model = "test-large-context-model";
let small_model = "test-small-context-model";
let compact_summary = "SMALL_CONTEXT_PRE_SAMPLING_SUMMARY";
let over_small_model_limit_tokens = 115_000;
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![
test_remote_model_with_context_window(large_model, 273_000),
test_remote_model_with_context_window(small_model, 125_000),
],
},
)
.await;
let compacted_history = vec![
codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: compact_summary.to_string(),
}],
end_turn: None,
phase: None,
},
codex_protocol::models::ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_SMALL_CONTEXT_PRE_SAMPLING_SUMMARY".to_string(),
},
];
let compact_mock_1 = mount_compact_json_once(
&server,
serde_json::json!({ "output": compacted_history.clone() }),
)
.await;
let compact_mock_2 =
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("m1", FIRST_REPLY),
ev_completed_with_tokens("r1", over_small_model_limit_tokens),
]),
)
.await;
let follow_up_user = "smaller model follow up";
let follow_up_mock = mount_sse_once_match(
&server,
move |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains(follow_up_user) && body.contains(compact_summary)
},
sse(vec![
ev_assistant_message("m2", FINAL_REPLY),
ev_completed("r2"),
]),
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(move |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some(large_model.to_string());
});
let test = builder.build(&server).await.unwrap();
let models_manager = test.thread_manager.get_models_manager();
let _ = models_manager
.list_models(&test.config, RefreshStrategy::OnlineIfUncached)
.await;
let model_requests = models_mock.requests();
assert_eq!(
model_requests.len(),
1,
"expected a single /models request for online model metadata"
);
assert_eq!(model_requests[0].url.path(), "/v1/models");
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "first turn on large model".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: large_model.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
personality: None,
})
.await
.unwrap();
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: follow_up_user.into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: small_model.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
personality: None,
})
.await
.unwrap();
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::ContextCompacted(_))
})
.await;
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
let mut compact_requests = compact_mock_1.requests();
compact_requests.extend(compact_mock_2.requests());
assert!(
!compact_requests.is_empty(),
"expected compaction before follow-up request on the smaller model"
);
assert_eq!(compact_requests[0].path(), "/v1/responses/compact");
let first_compact_body = compact_requests[0].body_json();
assert_eq!(
first_compact_body
.get("model")
.and_then(|value| value.as_str()),
Some(large_model),
"expected first pre-sampling compact to run with previous larger model"
);
let follow_up_requests = follow_up_mock.requests();
assert!(
!follow_up_requests.is_empty(),
"expected at least one follow-up /responses request"
);
let follow_up_request = follow_up_requests
.last()
.expect("follow-up request")
.body_json();
assert_eq!(
follow_up_request
.get("model")
.and_then(|value| value.as_str()),
Some(small_model),
"expected follow-up response request to use the smaller model"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_runs_pre_sampling_after_resume_when_switching_to_smaller_context_window_model()
{
skip_if_no_network!();
let server = MockServer::start().await;
let large_model = "test-large-context-model";
let small_model = "test-small-context-model";
let compact_summary = "RESUMED_SMALL_CONTEXT_PRE_SAMPLING_SUMMARY";
let over_small_model_limit_tokens = 115_000;
mount_models_once(
&server,
ModelsResponse {
models: vec![
test_remote_model_with_context_window(large_model, 273_000),
test_remote_model_with_context_window(small_model, 125_000),
],
},
)
.await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("m1", FIRST_REPLY),
ev_completed_with_tokens("r1", over_small_model_limit_tokens),
]),
)
.await;
let mut start_builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(move |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some(large_model.to_string());
});
let initial = start_builder.build(&server).await.unwrap();
let home = initial.home.clone();
let rollout_path = initial
.session_configured
.rollout_path
.clone()
.expect("rollout path");
let models_manager = initial.thread_manager.get_models_manager();
let _ = models_manager
.list_models(&initial.config, RefreshStrategy::OnlineIfUncached)
.await;
initial
.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "first turn on large model".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: initial.cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: large_model.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
personality: None,
})
.await
.unwrap();
wait_for_event(&initial.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
let compacted_history = vec![
codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: compact_summary.to_string(),
}],
end_turn: None,
phase: None,
},
codex_protocol::models::ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_RESUMED_SMALL_CONTEXT_PRE_SAMPLING_SUMMARY".to_string(),
},
];
let compact_mock_1 = mount_compact_json_once(
&server,
serde_json::json!({ "output": compacted_history.clone() }),
)
.await;
let compact_mock_2 =
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
let follow_up_user = "smaller model follow up after resume";
let follow_up_mock = mount_sse_once_match(
&server,
move |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains(follow_up_user) && body.contains(compact_summary)
},
sse(vec![
ev_assistant_message("m2", FINAL_REPLY),
ev_completed("r2"),
]),
)
.await;
let mut resume_builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(move |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some(large_model.to_string());
});
let resumed = resume_builder
.resume(&server, home, rollout_path)
.await
.unwrap();
resumed
.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: follow_up_user.into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: resumed.cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: small_model.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
personality: None,
})
.await
.unwrap();
wait_for_event(&resumed.codex, |event| {
matches!(event, EventMsg::ContextCompacted(_))
})
.await;
wait_for_event(&resumed.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
let mut compact_requests = compact_mock_1.requests();
compact_requests.extend(compact_mock_2.requests());
assert!(
!compact_requests.is_empty(),
"expected compaction before follow-up request after resume"
);
assert_eq!(compact_requests[0].path(), "/v1/responses/compact");
let first_compact_body = compact_requests[0].body_json();
assert_eq!(
first_compact_body
.get("model")
.and_then(|value| value.as_str()),
Some(large_model),
"expected first resumed pre-sampling compact to use previous larger model"
);
let follow_up_requests = follow_up_mock.requests();
assert!(
!follow_up_requests.is_empty(),
"expected at least one resumed follow-up /responses request"
);
let follow_up_request = follow_up_requests
.last()
.expect("resumed follow-up request")
.body_json();
assert_eq!(
follow_up_request
.get("model")
.and_then(|value| value.as_str()),
Some(small_model),
"expected resumed follow-up response request to use smaller model"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_persists_rollout_entries() {
skip_if_no_network!();