mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
chore: drop model_max_output_tokens (#7100)
This commit is contained in:
@@ -543,7 +543,6 @@ impl Session {
|
||||
config.model_reasoning_effort,
|
||||
config.model_reasoning_summary,
|
||||
config.model_context_window,
|
||||
config.model_max_output_tokens,
|
||||
config.model_auto_compact_token_limit,
|
||||
config.approval_policy,
|
||||
config.sandbox_policy.clone(),
|
||||
|
||||
@@ -86,9 +86,6 @@ pub struct Config {
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<i64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<i64>,
|
||||
|
||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||
pub model_auto_compact_token_limit: Option<i64>,
|
||||
|
||||
@@ -570,9 +567,6 @@ pub struct ConfigToml {
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<i64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<i64>,
|
||||
|
||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||
pub model_auto_compact_token_limit: Option<i64>,
|
||||
|
||||
@@ -1122,11 +1116,6 @@ impl Config {
|
||||
let model_context_window = cfg
|
||||
.model_context_window
|
||||
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
||||
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
.map(|info| info.max_output_tokens)
|
||||
});
|
||||
let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
@@ -1178,7 +1167,6 @@ impl Config {
|
||||
review_model,
|
||||
model_family,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
model_auto_compact_token_limit,
|
||||
model_provider_id,
|
||||
model_provider,
|
||||
@@ -2961,7 +2949,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_auto_compact_token_limit: Some(180_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
@@ -3034,7 +3021,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
model_auto_compact_token_limit: Some(14_746),
|
||||
model_provider_id: "openai-chat-completions".to_string(),
|
||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||
@@ -3122,7 +3108,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_auto_compact_token_limit: Some(180_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
@@ -3196,7 +3181,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
|
||||
@@ -2,7 +2,6 @@ use crate::model_family::ModelFamily;
|
||||
|
||||
// Shared constants for commonly used window/token sizes.
|
||||
pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000;
|
||||
pub(crate) const MAX_OUTPUT_TOKENS_128K: i64 = 128_000;
|
||||
|
||||
/// Metadata about a model, particularly OpenAI models.
|
||||
/// We may want to consider including details like the pricing for
|
||||
@@ -14,19 +13,15 @@ pub(crate) struct ModelInfo {
|
||||
/// Size of the context window in tokens. This is the maximum size of the input context.
|
||||
pub(crate) context_window: i64,
|
||||
|
||||
/// Maximum number of output tokens that can be generated for the model.
|
||||
pub(crate) max_output_tokens: i64,
|
||||
|
||||
/// Token threshold where we should automatically compact conversation history. This considers
|
||||
/// input tokens + output tokens of this turn.
|
||||
pub(crate) auto_compact_token_limit: Option<i64>,
|
||||
}
|
||||
|
||||
impl ModelInfo {
|
||||
const fn new(context_window: i64, max_output_tokens: i64) -> Self {
|
||||
const fn new(context_window: i64) -> Self {
|
||||
Self {
|
||||
context_window,
|
||||
max_output_tokens,
|
||||
auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)),
|
||||
}
|
||||
}
|
||||
@@ -42,48 +37,44 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||
// OSS models have a 128k shared token pool.
|
||||
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
|
||||
// https://openai.com/index/gpt-oss-model-card/
|
||||
"gpt-oss-20b" => Some(ModelInfo::new(96_000, 32_000)),
|
||||
"gpt-oss-120b" => Some(ModelInfo::new(96_000, 32_000)),
|
||||
"gpt-oss-20b" => Some(ModelInfo::new(96_000)),
|
||||
"gpt-oss-120b" => Some(ModelInfo::new(96_000)),
|
||||
// https://platform.openai.com/docs/models/o3
|
||||
"o3" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"o3" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/o4-mini
|
||||
"o4-mini" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"o4-mini" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/codex-mini-latest
|
||||
"codex-mini-latest" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"codex-mini-latest" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
||||
// https://platform.openai.com/docs/models/gpt-4.1
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576, 32_768)),
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576)),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
||||
// https://platform.openai.com/docs/models/gpt-4o
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000, 16_384)),
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000, 4_096)),
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000, 16_384)),
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
|
||||
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385)),
|
||||
|
||||
_ if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex-max") =>
|
||||
{
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K))
|
||||
}
|
||||
|
||||
_ if slug.starts_with("gpt-5") => {
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
}
|
||||
_ if slug.starts_with("gpt-5") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)),
|
||||
|
||||
_ if slug.starts_with("codex-") => {
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
}
|
||||
_ if slug.starts_with("codex-") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
|
||||
@@ -88,7 +88,6 @@ impl OtelEventManager {
|
||||
reasoning_effort: Option<ReasoningEffort>,
|
||||
reasoning_summary: ReasoningSummary,
|
||||
context_window: Option<i64>,
|
||||
max_output_tokens: Option<i64>,
|
||||
auto_compact_token_limit: Option<i64>,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
@@ -111,7 +110,6 @@ impl OtelEventManager {
|
||||
reasoning_effort = reasoning_effort.map(|e| e.to_string()),
|
||||
reasoning_summary = %reasoning_summary,
|
||||
context_window = context_window,
|
||||
max_output_tokens = max_output_tokens,
|
||||
auto_compact_token_limit = auto_compact_token_limit,
|
||||
approval_policy = %approval_policy,
|
||||
sandbox_policy = %sandbox_policy,
|
||||
|
||||
@@ -247,12 +247,6 @@ The size of the context window for the model, in tokens.
|
||||
|
||||
In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
|
||||
|
||||
### model_max_output_tokens
|
||||
|
||||
This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
|
||||
|
||||
> See also [`codex exec`](./exec.md) to see how these model settings influence non-interactive runs.
|
||||
|
||||
### oss_provider
|
||||
|
||||
Specifies the default OSS provider to use when running Codex. This is used when the `--oss` flag is provided without a specific provider.
|
||||
@@ -945,7 +939,6 @@ Valid values:
|
||||
| `model` | string | Model to use (e.g., `gpt-5.1-codex-max`). |
|
||||
| `model_provider` | string | Provider id from `model_providers` (default: `openai`). |
|
||||
| `model_context_window` | number | Context window tokens. |
|
||||
| `model_max_output_tokens` | number | Max output tokens. |
|
||||
| `tool_output_token_limit` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). |
|
||||
| `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. |
|
||||
| `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. |
|
||||
|
||||
@@ -30,7 +30,6 @@ model_provider = "openai"
|
||||
# Optional manual model metadata. When unset, Codex auto-detects from model.
|
||||
# Uncomment to force values.
|
||||
# model_context_window = 128000 # tokens; default: auto for model
|
||||
# model_max_output_tokens = 8192 # tokens; default: auto for model
|
||||
# model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific
|
||||
# tool_output_token_limit = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex-max
|
||||
|
||||
|
||||
Reference in New Issue
Block a user