diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 7744480ab4..2e29b5c361 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -4479,7 +4479,7 @@ pub(crate) async fn run_turn( // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop. if token_limit_reached && needs_follow_up { - if run_auto_compact(&sess, &turn_context).await.is_err() { + if run_auto_compact(&sess, &turn_context, false).await.is_err() { return None; } continue; @@ -4586,12 +4586,23 @@ async fn run_pre_sampling_compact( turn_context: &Arc, ) -> CodexResult<()> { let total_usage_tokens_before_compaction = sess.get_total_token_usage().await; - maybe_run_previous_model_inline_compact( + let previous_model = sess.previous_model().await; + let previous_model_compaction_ran = maybe_run_previous_model_inline_compact( sess, turn_context, total_usage_tokens_before_compaction, ) .await?; + if previous_model_compaction_ran + && let Some(model_switch_item) = sess.build_model_instructions_update_item( + None, + previous_model.as_deref(), + turn_context.as_ref(), + ) + { + sess.record_conversation_items(turn_context.as_ref(), &[model_switch_item]) + .await; + } let total_usage_tokens = sess.get_total_token_usage().await; let auto_compact_limit = turn_context .model_info @@ -4599,7 +4610,7 @@ async fn run_pre_sampling_compact( .unwrap_or(i64::MAX); // Compact if the total usage tokens are greater than the auto compact limit if total_usage_tokens >= auto_compact_limit { - run_auto_compact(sess, turn_context).await?; + run_auto_compact(sess, turn_context, false).await?; } Ok(()) } @@ -4614,9 +4625,9 @@ async fn maybe_run_previous_model_inline_compact( sess: &Arc, turn_context: &Arc, total_usage_tokens: i64, -) -> CodexResult<()> { +) -> CodexResult { let Some(previous_model) = sess.previous_model().await else { - return Ok(()); + return Ok(false); }; let previous_turn_context = Arc::new( turn_context @@ -4625,10 +4636,10 @@ async fn maybe_run_previous_model_inline_compact( ); let Some(old_context_window) = previous_turn_context.model_context_window() else { - return Ok(()); + return Ok(false); }; let Some(new_context_window) = turn_context.model_context_window() else { - return Ok(()); + return Ok(false); }; let new_auto_compact_limit = turn_context .model_info @@ -4638,16 +4649,31 @@ async fn maybe_run_previous_model_inline_compact( && previous_turn_context.model_info.slug != turn_context.model_info.slug && old_context_window > new_context_window; if should_run { - run_auto_compact(sess, &previous_turn_context).await?; + run_auto_compact(sess, &previous_turn_context, true).await?; + return Ok(true); } - Ok(()) + Ok(false) } -async fn run_auto_compact(sess: &Arc, turn_context: &Arc) -> CodexResult<()> { +async fn run_auto_compact( + sess: &Arc, + turn_context: &Arc, + strip_trailing_model_switch_update: bool, +) -> CodexResult<()> { if should_use_remote_compact_task(&turn_context.provider) { - run_inline_remote_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await?; + run_inline_remote_auto_compact_task( + Arc::clone(sess), + Arc::clone(turn_context), + strip_trailing_model_switch_update, + ) + .await?; } else { - run_inline_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await?; + run_inline_auto_compact_task( + Arc::clone(sess), + Arc::clone(turn_context), + strip_trailing_model_switch_update, + ) + .await?; } Ok(()) }