Files
codex/codex-rs/app-server/tests/common/models_cache.rs
Shijie Rao 370b13afc9 Honor client-resolved service tier defaults (#23537)
## Why

Model catalog responses can now advertise a nullable
`default_service_tier` for each model. Codex needs to preserve three
distinct states all the way from config/app-server inputs to inference:

- no explicit service tier, so the client may apply the current model
catalog default when FastMode is enabled
- explicit `default`, meaning the user intentionally wants standard
routing
- explicit catalog tier ids such as `priority`, `flex`, or future tiers

Keeping those states distinct prevents the UI from showing one tier
while core sends another, especially after model switches or app-server
`thread/start` / `turn/start` updates.

## What Changed

- Plumbed `default_service_tier` through model catalog protocol types,
app-server model responses, generated schemas, model cache fixtures, and
provider/model-manager conversions.
- Added the request-only `default` service tier sentinel and normalized
legacy config spelling so `fast` in `config.toml` still materializes as
the runtime/request id `priority`.
- Moved catalog default resolution to the TUI/client side, including
recomputing the effective service tier when model/FastMode-dependent
surfaces change.
- Updated app-server thread lifecycle config construction so
`serviceTier: null` preserves explicit standard-routing intent by
mapping to `default` instead of internal `None`.
- Kept core responsible for validating explicit tiers against the
current model and stripping `default` before `/v1/responses`, without
applying catalog defaults itself.

## Validation

- `CARGO_INCREMENTAL=0 cargo build -p codex-cli`
- `CARGO_INCREMENTAL=0 cargo test -p codex-app-server model_list`
- `cargo test -p codex-tui service_tier`
- `cargo test -p codex-protocol service_tier_for_request`
- `cargo test -p codex-core get_service_tier`
- `RUST_MIN_STACK=8388608 CARGO_INCREMENTAL=0 cargo test -p codex-core
service_tier`
2026-05-20 15:57:50 -07:00

101 lines
4.2 KiB
Rust

use chrono::DateTime;
use chrono::Utc;
use codex_core::test_support::all_model_presets;
use codex_models_manager::client_version_to_whole;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::TruncationPolicyConfig;
use codex_protocol::openai_models::default_input_modalities;
use serde_json::json;
use std::path::Path;
/// Convert a ModelPreset to ModelInfo for cache storage.
fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
ModelInfo {
slug: preset.id.clone(),
display_name: preset.display_name.clone(),
description: Some(preset.description.clone()),
default_reasoning_level: Some(preset.default_reasoning_effort),
supported_reasoning_levels: preset.supported_reasoning_efforts.clone(),
shell_type: ConfigShellToolType::ShellCommand,
visibility: if preset.show_in_picker {
ModelVisibility::List
} else {
ModelVisibility::Hide
},
supported_in_api: preset.supported_in_api,
priority,
additional_speed_tiers: preset.additional_speed_tiers.clone(),
service_tiers: preset.service_tiers.clone(),
default_service_tier: preset.default_service_tier.clone(),
upgrade: preset.upgrade.as_ref().map(Into::into),
base_instructions: "base instructions".to_string(),
model_messages: None,
supports_reasoning_summaries: false,
default_reasoning_summary: ReasoningSummary::Auto,
support_verbosity: false,
default_verbosity: None,
availability_nux: preset.availability_nux.clone(),
apply_patch_tool_type: None,
web_search_tool_type: Default::default(),
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
max_context_window: None,
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
}
}
/// Write a models_cache.json file to the codex home directory.
/// This prevents ModelsManager from making network requests to refresh models.
/// The cache will be treated as fresh (within TTL) and used instead of fetching from the network.
/// Uses bundled-catalog-derived presets, converted to ModelInfo format.
pub fn write_models_cache(codex_home: &Path) -> std::io::Result<()> {
// Get a stable bundled-catalog-derived preset list and filter for picker-visible entries.
let presets: Vec<&ModelPreset> = all_model_presets()
.iter()
.filter(|preset| preset.show_in_picker)
.collect();
// Convert presets to ModelInfo, assigning priorities (lower = earlier in list).
// Priority is used for sorting, so the first model gets the lowest priority.
let models: Vec<ModelInfo> = presets
.iter()
.enumerate()
.map(|(idx, preset)| {
// Lower priority = earlier in list.
let priority = idx as i32;
preset_to_info(preset, priority)
})
.collect();
write_models_cache_with_models(codex_home, models)
}
/// Write a models_cache.json file with specific models.
/// Useful when tests need specific models to be available.
pub fn write_models_cache_with_models(
codex_home: &Path,
models: Vec<ModelInfo>,
) -> std::io::Result<()> {
let cache_path = codex_home.join("models_cache.json");
// DateTime<Utc> serializes to RFC3339 format by default with serde
let fetched_at: DateTime<Utc> = Utc::now();
let client_version = client_version_to_whole();
let cache = json!({
"fetched_at": fetched_at,
"etag": null,
"client_version": client_version,
"models": models
});
std::fs::write(cache_path, serde_json::to_string_pretty(&cache)?)
}