Clamp auto-compact limit to context window (#11516)

- Clamp auto-compaction to the minimum of configured limit and 90% of
context window
- Add an e2e compact test for clamped behavior
- Update remote compact tests to account for earlier auto-compaction in
setup turns
This commit is contained in:
Ahmed Ibrahim
2026-02-11 17:41:08 -08:00
committed by GitHub
parent 6938150c5e
commit 40de788c4d
3 changed files with 70 additions and 10 deletions

View File

@@ -238,7 +238,8 @@ pub struct ModelInfo {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub context_window: Option<i64>,
/// Token threshold for automatic compaction. When omitted, core derives it
/// from `context_window` (90%).
/// from `context_window` (90%). When provided, core clamps it to 90% of the
/// context window when available.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub auto_compact_token_limit: Option<i64>,
/// Percentage of the context window considered usable for inputs, after
@@ -256,10 +257,16 @@ pub struct ModelInfo {
impl ModelInfo {
pub fn auto_compact_token_limit(&self) -> Option<i64> {
self.auto_compact_token_limit.or_else(|| {
self.context_window
.map(|context_window| (context_window * 9) / 10)
})
let context_limit = self
.context_window
.map(|context_window| (context_window * 9) / 10);
let config_limit = self.auto_compact_token_limit;
if let Some(context_limit) = context_limit {
return Some(
config_limit.map_or(context_limit, |limit| std::cmp::min(limit, context_limit)),
);
}
config_limit
}
pub fn supports_personality(&self) -> bool {