Compare commits

...

5 Commits

Author SHA1 Message Date
kevin zhao
6652d8561e improving behavior w.r.t context window and rate limits for non openai models 2025-10-28 10:46:02 -07:00
zhao-oai
07c1bd927f Merge branch 'main' into fixing-model-info 2025-10-27 17:58:57 -07:00
zhao-oai
99cfd41761 Merge branch 'main' into fixing-model-info 2025-10-27 17:26:39 -07:00
zhao-oai
25993584bd Merge branch 'main' into fixing-model-info 2025-10-27 16:25:21 -07:00
kevin zhao
470b13c26f normalizing model slug in get_model_info 2025-10-27 14:43:54 -07:00
3 changed files with 142 additions and 25 deletions

View File

@@ -37,8 +37,10 @@ impl ModelInfo {
}
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
let slug = model_family.slug.as_str();
match slug {
let raw_slug = model_family.slug.as_str();
let slug = raw_slug.strip_prefix("openai/").unwrap_or(raw_slug);
let normalized_slug = slug.replace(':', "-");
match normalized_slug.as_str() {
// OSS models have a 128k shared token pool.
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
// https://openai.com/index/gpt-oss-model-card/

View File

@@ -6,6 +6,7 @@ use crate::version::CODEX_CLI_VERSION;
use chrono::DateTime;
use chrono::Local;
use codex_common::create_config_summary_entries;
use codex_core::WireApi;
use codex_core::config::Config;
use codex_core::protocol::SandboxPolicy;
use codex_core::protocol::TokenUsage;
@@ -34,9 +35,9 @@ use super::rate_limits::render_status_limit_progress_bar;
#[derive(Debug, Clone)]
struct StatusContextWindowData {
percent_remaining: i64,
tokens_in_context: i64,
window: i64,
percent_remaining: Option<i64>,
tokens_in_context: Option<i64>,
}
#[derive(Debug, Clone)]
@@ -59,6 +60,7 @@ struct StatusHistoryCell {
session_id: Option<String>,
token_usage: StatusTokenUsageData,
rate_limits: StatusRateLimitData,
wire_api: WireApi,
}
pub(crate) fn new_status_output(
@@ -93,6 +95,7 @@ impl StatusHistoryCell {
) -> Self {
let config_entries = create_config_summary_entries(config);
let (model_name, model_details) = compose_model_display(config, &config_entries);
let wire_api = config.model_provider.wire_api;
let approval = config_entries
.iter()
.find(|(k, _)| *k == "approval")
@@ -106,13 +109,21 @@ impl StatusHistoryCell {
let agents_summary = compose_agents_summary(config);
let account = compose_account_display(config);
let session_id = session_id.as_ref().map(std::string::ToString::to_string);
let context_window = config.model_context_window.and_then(|window| {
context_usage.map(|usage| StatusContextWindowData {
percent_remaining: usage.percent_of_context_window_remaining(window),
tokens_in_context: usage.tokens_in_context_window(),
let context_window = match (wire_api, config.model_context_window) {
(WireApi::Responses, Some(window)) => {
context_usage.map(|usage| StatusContextWindowData {
window,
percent_remaining: Some(usage.percent_of_context_window_remaining(window)),
tokens_in_context: Some(usage.tokens_in_context_window()),
})
}
(WireApi::Chat, Some(window)) => Some(StatusContextWindowData {
window,
})
});
percent_remaining: None,
tokens_in_context: None,
}),
_ => None,
};
let token_usage = StatusTokenUsageData {
total: total_usage.blended_total(),
@@ -133,6 +144,7 @@ impl StatusHistoryCell {
session_id,
token_usage,
rate_limits,
wire_api,
}
}
@@ -155,26 +167,42 @@ impl StatusHistoryCell {
}
fn context_window_spans(&self) -> Option<Vec<Span<'static>>> {
let context = self.token_usage.context_window.as_ref()?;
let percent = context.percent_remaining;
let used_fmt = format_tokens_compact(context.tokens_in_context);
let window_fmt = format_tokens_compact(context.window);
match self.wire_api {
WireApi::Responses => {
let context = self.token_usage.context_window.as_ref()?;
let percent = context.percent_remaining?;
let tokens_in_context = context.tokens_in_context?;
let window = context.window;
let used_fmt = format_tokens_compact(tokens_in_context);
let window_fmt = format_tokens_compact(window);
Some(vec![
Span::from(format!("{percent}% left")),
Span::from(" (").dim(),
Span::from(used_fmt).dim(),
Span::from(" used / ").dim(),
Span::from(window_fmt).dim(),
Span::from(")").dim(),
])
Some(vec![
Span::from(format!("{percent}% left")),
Span::from(" (").dim(),
Span::from(used_fmt).dim(),
Span::from(" used / ").dim(),
Span::from(window_fmt).dim(),
Span::from(")").dim(),
])
}
WireApi::Chat => match self.token_usage.context_window.as_ref() {
Some(context) => {
let window_fmt = format_tokens_compact(context.window);
Some(vec![Span::from(format!("{window_fmt} tokens"))])
}
None => Some(vec!["unknown".dim()]),
},
}
}
fn rate_limit_lines(
&self,
available_inner_width: usize,
formatter: &FieldFormatter,
) -> Vec<Line<'static>> {
if self.wire_api != WireApi::Responses {
return Vec::new();
}
match &self.rate_limits {
StatusRateLimitData::Available(rows_data) => {
if rows_data.is_empty() {
@@ -244,6 +272,10 @@ impl StatusHistoryCell {
}
fn collect_rate_limit_labels(&self, seen: &mut BTreeSet<String>, labels: &mut Vec<String>) {
if self.wire_api != WireApi::Responses {
return;
}
match &self.rate_limits {
StatusRateLimitData::Available(rows) => {
if rows.is_empty() {
@@ -299,6 +331,7 @@ impl HistoryCell for StatusHistoryCell {
.map(str::to_string)
.collect();
let mut seen: BTreeSet<String> = labels.iter().cloned().collect();
let context_spans = self.context_window_spans();
if account_value.is_some() {
push_label(&mut labels, &mut seen, "Account");
@@ -307,7 +340,7 @@ impl HistoryCell for StatusHistoryCell {
push_label(&mut labels, &mut seen, "Session");
}
push_label(&mut labels, &mut seen, "Token usage");
if self.token_usage.context_window.is_some() {
if context_spans.is_some() {
push_label(&mut labels, &mut seen, "Context window");
}
self.collect_rate_limit_labels(&mut seen, &mut labels);
@@ -344,7 +377,7 @@ impl HistoryCell for StatusHistoryCell {
lines.push(formatter.line("Token usage", self.token_usage_spans()));
}
if let Some(spans) = self.context_window_spans() {
if let Some(spans) = context_spans {
lines.push(formatter.line("Context window", spans));
}

View File

@@ -4,6 +4,7 @@ use crate::history_cell::HistoryCell;
use chrono::Duration as ChronoDuration;
use chrono::TimeZone;
use chrono::Utc;
use codex_core::WireApi;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::config::ConfigToml;
@@ -387,6 +388,7 @@ fn status_snapshot_shows_stale_limits_message() {
fn status_context_window_uses_last_usage() {
let temp_home = TempDir::new().expect("temp home");
let mut config = test_config(&temp_home);
config.model_provider.wire_api = WireApi::Responses;
config.model_context_window = Some(272_000);
let total_usage = TokenUsage {
@@ -425,3 +427,83 @@ fn status_context_window_uses_last_usage() {
"context line should not use total aggregated tokens, got: {context_line}"
);
}
#[test]
fn status_context_window_for_chat_provider_shows_window_only() {
let temp_home = TempDir::new().expect("temp home");
let mut config = test_config(&temp_home);
config.model_provider.wire_api = WireApi::Chat;
config.model_context_window = Some(272_000);
config.cwd = PathBuf::from("/workspace/tests");
let usage = TokenUsage {
input_tokens: 900,
cached_input_tokens: 0,
output_tokens: 400,
reasoning_output_tokens: 0,
total_tokens: 1_300,
};
let now = chrono::Local
.with_ymd_and_hms(2024, 7, 1, 0, 0, 0)
.single()
.expect("timestamp");
let composite = new_status_output(&config, &usage, None, &None, None, now);
let rendered = render_lines(&composite.display_lines(80));
let context_line = rendered
.iter()
.find(|line| line.contains("Context window"))
.expect("context line");
assert!(
context_line.contains("272K tokens"),
"expected context line to show window length only, got: {context_line}"
);
assert!(
!context_line.contains("used /"),
"context line should not include context usage breakdown, got: {context_line}"
);
assert!(
rendered.iter().all(|line| !line.contains("Limits")),
"rate limits should be hidden for chat providers: {rendered:?}"
);
}
#[test]
fn status_context_window_for_chat_provider_unknown_window() {
let temp_home = TempDir::new().expect("temp home");
let mut config = test_config(&temp_home);
config.model_provider.wire_api = WireApi::Chat;
config.model_context_window = None;
config.cwd = PathBuf::from("/workspace/tests");
let usage = TokenUsage {
input_tokens: 900,
cached_input_tokens: 0,
output_tokens: 400,
reasoning_output_tokens: 0,
total_tokens: 1_300,
};
let now = chrono::Local
.with_ymd_and_hms(2024, 7, 2, 0, 0, 0)
.single()
.expect("timestamp");
let composite = new_status_output(&config, &usage, None, &None, None, now);
let rendered = render_lines(&composite.display_lines(80));
let context_line = rendered
.iter()
.find(|line| line.contains("Context window"))
.expect("context line");
assert!(
context_line.contains("unknown"),
"expected context line to show unknown when window is unset, got: {context_line}"
);
assert!(
rendered.iter().all(|line| !line.contains("Limits")),
"rate limits should be hidden for chat providers: {rendered:?}"
);
}