mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
Update context window after model switch (#11520)
- Update token usage aggregation to refresh model context window after a model change. - Add protocol/core tests, including an e2e model-switch test that validates switching to a smaller model updates telemetry.
This commit is contained in:
@@ -2499,8 +2499,8 @@ impl Session {
|
||||
total_tokens: estimated_total_tokens.max(0),
|
||||
};
|
||||
|
||||
if info.model_context_window.is_none() {
|
||||
info.model_context_window = turn_context.model_context_window();
|
||||
if let Some(model_context_window) = turn_context.model_context_window() {
|
||||
info.model_context_window = Some(model_context_window);
|
||||
}
|
||||
|
||||
state.set_token_info(Some(info));
|
||||
@@ -5827,6 +5827,28 @@ mod tests {
|
||||
assert_eq!(actual_tokens, expected_tokens.max(0));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recompute_token_usage_updates_model_context_window() {
|
||||
let (session, mut turn_context) = make_session_and_context().await;
|
||||
|
||||
{
|
||||
let mut state = session.state.lock().await;
|
||||
state.set_token_info(Some(TokenUsageInfo {
|
||||
total_token_usage: TokenUsage::default(),
|
||||
last_token_usage: TokenUsage::default(),
|
||||
model_context_window: Some(258_400),
|
||||
}));
|
||||
}
|
||||
|
||||
turn_context.model_info.context_window = Some(128_000);
|
||||
turn_context.model_info.effective_context_window_percent = 100;
|
||||
|
||||
session.recompute_token_usage(&turn_context).await;
|
||||
|
||||
let actual = session.state.lock().await.token_info().expect("token info");
|
||||
assert_eq!(actual.model_context_window, Some(128_000));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn record_initial_history_reconstructs_forked_transcript() {
|
||||
let (session, turn_context) = make_session_and_context().await;
|
||||
|
||||
@@ -18,8 +18,11 @@ use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
use codex_protocol::openai_models::TruncationPolicyConfig;
|
||||
use codex_protocol::openai_models::default_input_modalities;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_models_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::sse_completed;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -33,7 +36,7 @@ use wiremock::MockServer;
|
||||
async fn model_change_appends_model_instructions_developer_message() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let server = MockServer::start().await;
|
||||
let resp_mock = mount_sse_sequence(
|
||||
&server,
|
||||
vec![sse_completed("resp-1"), sse_completed("resp-2")],
|
||||
@@ -271,7 +274,6 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
|
||||
let _ = models_manager
|
||||
.list_models(&test.config, RefreshStrategy::OnlineIfUncached)
|
||||
.await;
|
||||
|
||||
let image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
|
||||
.to_string();
|
||||
|
||||
@@ -372,3 +374,220 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let large_model_slug = "test-image-model";
|
||||
let smaller_model_slug = "test-text-only-model";
|
||||
let large_context_window = 272_000;
|
||||
let smaller_context_window = 128_000;
|
||||
let effective_context_window_percent = 95;
|
||||
let large_effective_window = (large_context_window * effective_context_window_percent) / 100;
|
||||
let smaller_effective_window =
|
||||
(smaller_context_window * effective_context_window_percent) / 100;
|
||||
|
||||
let base_model = ModelInfo {
|
||||
slug: large_model_slug.to_string(),
|
||||
display_name: "Larger Model".to_string(),
|
||||
description: Some("larger context window model".to_string()),
|
||||
default_reasoning_level: Some(ReasoningEffort::Medium),
|
||||
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: ReasoningEffort::Medium.to_string(),
|
||||
}],
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
visibility: ModelVisibility::List,
|
||||
supported_in_api: true,
|
||||
input_modalities: default_input_modalities(),
|
||||
prefer_websockets: false,
|
||||
priority: 1,
|
||||
upgrade: None,
|
||||
base_instructions: "base instructions".to_string(),
|
||||
model_messages: None,
|
||||
supports_reasoning_summaries: false,
|
||||
support_verbosity: false,
|
||||
default_verbosity: None,
|
||||
apply_patch_tool_type: None,
|
||||
truncation_policy: TruncationPolicyConfig::bytes(10_000),
|
||||
supports_parallel_tool_calls: false,
|
||||
context_window: Some(large_context_window),
|
||||
auto_compact_token_limit: None,
|
||||
effective_context_window_percent,
|
||||
experimental_supported_tools: Vec::new(),
|
||||
};
|
||||
let mut smaller_model = base_model.clone();
|
||||
smaller_model.slug = smaller_model_slug.to_string();
|
||||
smaller_model.display_name = "Smaller Model".to_string();
|
||||
smaller_model.description = Some("smaller context window model".to_string());
|
||||
smaller_model.context_window = Some(smaller_context_window);
|
||||
|
||||
mount_models_once(
|
||||
&server,
|
||||
ModelsResponse {
|
||||
models: vec![base_model, smaller_model],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_completed_with_tokens("resp-1", 100),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_completed_with_tokens("resp-2", 120),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteModels);
|
||||
config.model = Some(large_model_slug.to_string());
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let models_manager = test.thread_manager.get_models_manager();
|
||||
let available_models = models_manager
|
||||
.list_models(&test.config, RefreshStrategy::Online)
|
||||
.await;
|
||||
assert!(
|
||||
available_models
|
||||
.iter()
|
||||
.any(|model| model.model == smaller_model_slug),
|
||||
"expected {smaller_model_slug} to be available in remote model list"
|
||||
);
|
||||
let large_model_info = models_manager
|
||||
.get_model_info(large_model_slug, &test.config)
|
||||
.await;
|
||||
assert_eq!(large_model_info.context_window, Some(large_context_window));
|
||||
let smaller_model_info = models_manager
|
||||
.get_model_info(smaller_model_slug, &test.config)
|
||||
.await;
|
||||
assert_eq!(
|
||||
smaller_model_info.context_window,
|
||||
Some(smaller_context_window)
|
||||
);
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "use larger model".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
model: large_model_slug.to_string(),
|
||||
effort: test.config.model_reasoning_effort,
|
||||
summary: ReasoningSummary::Auto,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let large_window_event = wait_for_event(&test.codex, |event| {
|
||||
matches!(
|
||||
event,
|
||||
EventMsg::TokenCount(token_count)
|
||||
if token_count
|
||||
.info
|
||||
.as_ref()
|
||||
.is_some_and(|info| info.last_token_usage.total_tokens == 100)
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let EventMsg::TokenCount(large_token_count) = large_window_event else {
|
||||
unreachable!("wait_for_event returned unexpected event");
|
||||
};
|
||||
assert_eq!(
|
||||
large_token_count
|
||||
.info
|
||||
.as_ref()
|
||||
.and_then(|info| info.model_context_window),
|
||||
Some(large_effective_window)
|
||||
);
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
test.codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
windows_sandbox_level: None,
|
||||
model: Some(smaller_model_slug.to_string()),
|
||||
effort: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "switch to smaller model".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
model: smaller_model_slug.to_string(),
|
||||
effort: test.config.model_reasoning_effort,
|
||||
summary: ReasoningSummary::Auto,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let smaller_turn_started_event = wait_for_event(&test.codex, |event| {
|
||||
matches!(
|
||||
event,
|
||||
EventMsg::TurnStarted(started)
|
||||
if started.model_context_window == Some(smaller_effective_window)
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let EventMsg::TurnStarted(smaller_turn_started) = smaller_turn_started_event else {
|
||||
unreachable!("wait_for_event returned unexpected event");
|
||||
};
|
||||
assert_eq!(
|
||||
smaller_turn_started.model_context_window,
|
||||
Some(smaller_effective_window)
|
||||
);
|
||||
|
||||
let smaller_window_event = wait_for_event(&test.codex, |event| {
|
||||
matches!(
|
||||
event,
|
||||
EventMsg::TokenCount(token_count)
|
||||
if token_count
|
||||
.info
|
||||
.as_ref()
|
||||
.is_some_and(|info| info.last_token_usage.total_tokens == 120)
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let EventMsg::TokenCount(smaller_token_count) = smaller_window_event else {
|
||||
unreachable!("wait_for_event returned unexpected event");
|
||||
};
|
||||
let smaller_window = smaller_token_count
|
||||
.info
|
||||
.as_ref()
|
||||
.and_then(|info| info.model_context_window);
|
||||
assert_eq!(smaller_window, Some(smaller_effective_window));
|
||||
assert_ne!(smaller_window, Some(large_effective_window));
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1206,6 +1206,9 @@ impl TokenUsageInfo {
|
||||
if let Some(last) = last {
|
||||
info.append_last_usage(last);
|
||||
}
|
||||
if let Some(model_context_window) = model_context_window {
|
||||
info.model_context_window = Some(model_context_window);
|
||||
}
|
||||
Some(info)
|
||||
}
|
||||
|
||||
@@ -2820,4 +2823,46 @@ mod tests {
|
||||
assert_eq!(value["msg"]["cancelled"][0], "c");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_info_new_or_append_updates_context_window_when_provided() {
|
||||
let initial = Some(TokenUsageInfo {
|
||||
total_token_usage: TokenUsage::default(),
|
||||
last_token_usage: TokenUsage::default(),
|
||||
model_context_window: Some(258_400),
|
||||
});
|
||||
let last = Some(TokenUsage {
|
||||
input_tokens: 10,
|
||||
cached_input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 10,
|
||||
});
|
||||
|
||||
let info = TokenUsageInfo::new_or_append(&initial, &last, Some(128_000))
|
||||
.expect("new_or_append should return info");
|
||||
|
||||
assert_eq!(info.model_context_window, Some(128_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_info_new_or_append_preserves_context_window_when_not_provided() {
|
||||
let initial = Some(TokenUsageInfo {
|
||||
total_token_usage: TokenUsage::default(),
|
||||
last_token_usage: TokenUsage::default(),
|
||||
model_context_window: Some(258_400),
|
||||
});
|
||||
let last = Some(TokenUsage {
|
||||
input_tokens: 10,
|
||||
cached_input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 10,
|
||||
});
|
||||
|
||||
let info = TokenUsageInfo::new_or_append(&initial, &last, None)
|
||||
.expect("new_or_append should return info");
|
||||
|
||||
assert_eq!(info.model_context_window, Some(258_400));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user