Fix model switch compaction

This commit is contained in:
Charles Cunningham
2026-02-13 18:03:53 -08:00
parent eb68767f2f
commit 2d4123c267

View File

@@ -4479,7 +4479,7 @@ pub(crate) async fn run_turn(
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
if token_limit_reached && needs_follow_up {
if run_auto_compact(&sess, &turn_context).await.is_err() {
if run_auto_compact(&sess, &turn_context, false).await.is_err() {
return None;
}
continue;
@@ -4586,12 +4586,23 @@ async fn run_pre_sampling_compact(
turn_context: &Arc<TurnContext>,
) -> CodexResult<()> {
let total_usage_tokens_before_compaction = sess.get_total_token_usage().await;
maybe_run_previous_model_inline_compact(
let previous_model = sess.previous_model().await;
let previous_model_compaction_ran = maybe_run_previous_model_inline_compact(
sess,
turn_context,
total_usage_tokens_before_compaction,
)
.await?;
if previous_model_compaction_ran
&& let Some(model_switch_item) = sess.build_model_instructions_update_item(
None,
previous_model.as_deref(),
turn_context.as_ref(),
)
{
sess.record_conversation_items(turn_context.as_ref(), &[model_switch_item])
.await;
}
let total_usage_tokens = sess.get_total_token_usage().await;
let auto_compact_limit = turn_context
.model_info
@@ -4599,7 +4610,7 @@ async fn run_pre_sampling_compact(
.unwrap_or(i64::MAX);
// Compact if the total usage tokens are greater than the auto compact limit
if total_usage_tokens >= auto_compact_limit {
run_auto_compact(sess, turn_context).await?;
run_auto_compact(sess, turn_context, false).await?;
}
Ok(())
}
@@ -4614,9 +4625,9 @@ async fn maybe_run_previous_model_inline_compact(
sess: &Arc<Session>,
turn_context: &Arc<TurnContext>,
total_usage_tokens: i64,
) -> CodexResult<()> {
) -> CodexResult<bool> {
let Some(previous_model) = sess.previous_model().await else {
return Ok(());
return Ok(false);
};
let previous_turn_context = Arc::new(
turn_context
@@ -4625,10 +4636,10 @@ async fn maybe_run_previous_model_inline_compact(
);
let Some(old_context_window) = previous_turn_context.model_context_window() else {
return Ok(());
return Ok(false);
};
let Some(new_context_window) = turn_context.model_context_window() else {
return Ok(());
return Ok(false);
};
let new_auto_compact_limit = turn_context
.model_info
@@ -4638,16 +4649,31 @@ async fn maybe_run_previous_model_inline_compact(
&& previous_turn_context.model_info.slug != turn_context.model_info.slug
&& old_context_window > new_context_window;
if should_run {
run_auto_compact(sess, &previous_turn_context).await?;
run_auto_compact(sess, &previous_turn_context, true).await?;
return Ok(true);
}
Ok(())
Ok(false)
}
async fn run_auto_compact(sess: &Arc<Session>, turn_context: &Arc<TurnContext>) -> CodexResult<()> {
async fn run_auto_compact(
sess: &Arc<Session>,
turn_context: &Arc<TurnContext>,
strip_trailing_model_switch_update: bool,
) -> CodexResult<()> {
if should_use_remote_compact_task(&turn_context.provider) {
run_inline_remote_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await?;
run_inline_remote_auto_compact_task(
Arc::clone(sess),
Arc::clone(turn_context),
strip_trailing_model_switch_update,
)
.await?;
} else {
run_inline_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await?;
run_inline_auto_compact_task(
Arc::clone(sess),
Arc::clone(turn_context),
strip_trailing_model_switch_update,
)
.await?;
}
Ok(())
}