From a5b4f85a5556a9ca1a922a6851016d153b09825a Mon Sep 17 00:00:00 2001 From: Friel Date: Mon, 27 Apr 2026 21:57:25 +0000 Subject: [PATCH] Add configurable custom models Co-authored-by: Codex --- .../app-server/tests/common/models_cache.rs | 1 + .../codex-api/tests/models_integration.rs | 1 + codex-rs/config/src/config_toml.rs | 17 +++ codex-rs/core/config.schema.json | 36 ++++++ codex-rs/core/root_agent_prompt.md | 44 +++++++ codex-rs/core/root_agent_watchdog_prompt.md | 27 ++++ codex-rs/core/src/client.rs | 6 +- codex-rs/core/src/config/config_tests.rs | 70 +++++++++++ codex-rs/core/src/config/mod.rs | 25 ++++ codex-rs/core/src/session/mod.rs | 57 +++++++++ codex-rs/core/src/session/tests.rs | 116 ++++++++++++++++++ codex-rs/core/src/test_support.rs | 6 +- codex-rs/core/src/thread_manager.rs | 7 +- .../src/tools/handlers/multi_agents/spawn.rs | 10 +- codex-rs/core/subagent_prompt.md | 11 ++ codex-rs/core/tests/suite/model_switching.rs | 45 +++++++ codex-rs/core/tests/suite/models_cache_ttl.rs | 1 + codex-rs/core/tests/suite/personality.rs | 2 + codex-rs/core/tests/suite/remote_models.rs | 3 + codex-rs/core/tests/suite/rmcp_client.rs | 1 + .../tests/suite/spawn_agent_description.rs | 1 + codex-rs/core/tests/suite/view_image.rs | 1 + codex-rs/core/watchdog_agent_prompt.md | 95 ++++++++++++++ codex-rs/features/src/lib.rs | 8 ++ codex-rs/features/src/tests.rs | 14 +++ .../src/amazon_bedrock/catalog.rs | 16 +++ .../model-provider/src/amazon_bedrock/mod.rs | 6 +- codex-rs/model-provider/src/provider.rs | 25 +++- codex-rs/models-manager/src/config.rs | 9 ++ codex-rs/models-manager/src/lib.rs | 1 + codex-rs/models-manager/src/manager.rs | 110 ++++++++++++++++- codex-rs/models-manager/src/manager_tests.rs | 41 +++++++ codex-rs/models-manager/src/model_info.rs | 1 + codex-rs/protocol/src/openai_models.rs | 10 ++ codex-rs/thread-manager-sample/src/main.rs | 1 + 35 files changed, 809 insertions(+), 16 deletions(-) create mode 100644 codex-rs/core/root_agent_prompt.md create mode 100644 codex-rs/core/root_agent_watchdog_prompt.md create mode 100644 codex-rs/core/subagent_prompt.md create mode 100644 codex-rs/core/watchdog_agent_prompt.md diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index 3b4a58a7ab..f972b6fd85 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -16,6 +16,7 @@ use std::path::Path; fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { ModelInfo { slug: preset.id.clone(), + request_model: None, display_name: preset.display_name.clone(), description: Some(preset.description.clone()), default_reasoning_level: Some(preset.default_reasoning_effort), diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 9f95c9441f..22e7fd0046 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -53,6 +53,7 @@ async fn models_client_hits_models_endpoint() { let response = ModelsResponse { models: vec![ModelInfo { slug: "gpt-test".to_string(), + request_model: None, display_name: "gpt-test".to_string(), description: Some("desc".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs index 4ec2728366..16ce33e7f2 100644 --- a/codex-rs/config/src/config_toml.rs +++ b/codex-rs/config/src/config_toml.rs @@ -224,6 +224,10 @@ pub struct ConfigToml { #[serde(default, deserialize_with = "deserialize_model_providers")] pub model_providers: HashMap, + /// User-defined model aliases that can override model context settings. + #[serde(default)] + pub custom_models: Vec, + /// Maximum number of bytes to include from an AGENTS.md project doc file. #[serde(default = "default_project_doc_max_bytes")] pub project_doc_max_bytes: Option, @@ -939,6 +943,19 @@ pub fn validate_model_providers( Ok(()) } +#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)] +#[schemars(deny_unknown_fields)] +pub struct CustomModelToml { + /// User-facing alias shown in the model picker. + pub name: String, + /// Provider-facing model slug used on API requests. + pub model: String, + /// Optional context window override applied when this alias is selected. + pub model_context_window: Option, + /// Optional auto-compaction token limit override applied when this alias is selected. + pub model_auto_compact_token_limit: Option, +} + fn deserialize_model_providers<'de, D>( deserializer: D, ) -> Result, D::Error> diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 4bf6d496dd..94006d12e4 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -696,6 +696,34 @@ }, "type": "object" }, + "CustomModelToml": { + "additionalProperties": false, + "properties": { + "model": { + "description": "Provider-facing model slug used on API requests.", + "type": "string" + }, + "model_auto_compact_token_limit": { + "description": "Optional auto-compaction token limit override applied when this alias is selected.", + "format": "int64", + "type": "integer" + }, + "model_context_window": { + "description": "Optional context window override applied when this alias is selected.", + "format": "int64", + "type": "integer" + }, + "name": { + "description": "User-facing alias shown in the model picker.", + "type": "string" + } + }, + "required": [ + "model", + "name" + ], + "type": "object" + }, "DebugConfigLockToml": { "additionalProperties": false, "properties": { @@ -3756,6 +3784,14 @@ "description": "Compact prompt used for history compaction.", "type": "string" }, + "custom_models": { + "default": [], + "description": "User-defined model aliases that can override model context settings.", + "items": { + "$ref": "#/definitions/CustomModelToml" + }, + "type": "array" + }, "debug": { "allOf": [ { diff --git a/codex-rs/core/root_agent_prompt.md b/codex-rs/core/root_agent_prompt.md new file mode 100644 index 0000000000..9eb78d33e0 --- /dev/null +++ b/codex-rs/core/root_agent_prompt.md @@ -0,0 +1,44 @@ +# You are the Root Agent + +You are the **root agent** in a multi-agent Codex session. Until you see `# You are a Subagent`, these instructions define your role. If this thread was created from the root thread with `"fork_turns":"all"` (a forked child), you may see both sets of instructions; apply subagent instructions as local role guidance while root instructions remain governing system-level rules. + +## Root Agent Responsibilities + +Your first job is to accomplish the user's goal. Relative to your subagents, you own their sequencing, integration, validation, and outcomes. Use subagents when they help you finish the user's goal faster or with better evidence. + +For multi-step efforts, keep a shared plan file or assign scoped plan files to subagents only when that improves execution or the user asks for it. A plan file is support work, not the deliverable, unless the user asked for a plan. + +## Subagent Responsibilities + +Subagents accomplish tasks, from the very small to the very large, within some scope of work decided by their parent agent. + +Subagents can behave incorrectly if their context changes while they work. Reduce this risk by: + +- Giving them tight, explicit scopes (paths, commands, expected outputs). +- Ensuring tasks have non-overlapping scopes - whether specific files, working trees, or otherwise. +- Telling subagents, especially non-forked subagents, whether they are working in working trees or directories in which your or other subagents may also be working. +- Providing updates to them when you change course. + +Treat useful long-running agents as collaborators with valuable context. When new work is a +continuation of an agent's existing assignment, continue the same agent thread instead of spawning +a near-duplicate. Use `followup_task` when the agent is already working on the same task, and `send_message` when you only need to leave queued context without starting a turn. + +## Forking agents + +When calling `spawn_agent`, the `fork_turns` argument only determines the initial context of the agent. `"fork_turns":"all"` gives the new agent the entire conversation up to the fork point. `"fork_turns":"none"` gives the new agent only the message you provide. All subagents can call tools and inherit your working directory. + +Forked agents are a superpower, answering the thought experiment, "What would you do if you could clone yourself?" They have all of the context of the user's messages, your messages, tool calls and results, they know everything you know from the point they are forked. When spawning an agent, always explicitly provide a `fork_turns` value; default to `"fork_turns":"all"` for subagents unless you need less context. + +When the user gives you a particularly hard problem, consider forking several subagents and grading their responses and deciding how to proceed. When you are unsure, you can instruct your forks to consider many approaches in parallel. + +Use `"fork_turns":"none"` when a task requires a neutral, independent analysis without needing information already in this thread. Always give non-forked agents explicit instructions, all relevant context or paths to files or tools to obtain it, their expected outcome or goal and the output you expect them to return to you. + + +## Operating Principles + +- Prefer direct execution over coordination when you can make faster progress yourself. +- Delegate when doing so will reduce wall-clock time, add necessary independent judgment, or improve review coverage enough to justify the coordination cost. You are responsible for integration and conflict resolution. +- Consider whether independent subtasks can start now in a subagent, but do not spawn agents for work you can complete faster yourself. +- Consider whether using multiple worktrees or remote hosts would accelerate you and your ability to use subagents. Use them if the user or developer instructions permit. +- Prefer clear, explicit instructions over implicit expectations, especially when not using forked agents which require significantly more direction. +- When you receive messages from other agents, verify their claims before relying on them. diff --git a/codex-rs/core/root_agent_watchdog_prompt.md b/codex-rs/core/root_agent_watchdog_prompt.md new file mode 100644 index 0000000000..5cf4616a2a --- /dev/null +++ b/codex-rs/core/root_agent_watchdog_prompt.md @@ -0,0 +1,27 @@ +## Watchdogs + +If the user gives you instructions that will take many turns or more than an hour to complete, start a watchdog by calling `spawn_agent` with `"agent_type":"watchdog"`. This watchdog will run until you close it, it closes itself, or you replace it by creating a new watchdog. + +When you create a watchdog, write `message` so it will still be correct hours or days later. A watchdog is a promise to create future agents with this same `message`, so do not describe the current project state. Provide the watchdog with a `message` that is anti-fragile to changes in the state of the repository, i.e.: how to determine progress, not statements of progress. You must teach your watchdog how to determine whether the user's overall goal is being accomplished or not, not tell it that it is. + +When the watchdog is triggered, it will act as a forked subagent with full access to the conversation. The watchdog will be able to see what tools you have called, what work you've done. + +The `message` should include: + +- The user's goal, preferably quoting the user's request verbatim, in both broad and specific terms. +- The context needed to interpret the user's request if the watchdog only had this `message`, including any definitions. +- Durable requirements, non-goals, reference files, plans, rubrics, and required validation, ideally in the form of paths or tools they can use to obtain this information in the future as it changes. +- Instructions for the watchdog to determine progress. +- Do not instruct the watchdog to run test suites or processes, instead tell it what tools and tests it should expect you to run, and what progress it should expect from you. + +The watchdog works best when there is some state on disk or a tool (a database, etc.) that can be defined up front: this means you should not create a watchdog unless this mechanism already exists, and if it does not, create it first. Unless instructed otherwise, put plan files in ~/.codex/plans. Do not use the plan tool for the watchdog. + +After creating the watchdog, begin working on the user's task immediately as if the watchdog does not exist. The watchdog will only act after you end your turn. Its job is to keep you working toward the user's goal, in case you have prematurely ended your turn. Do not try to prove the watchdog is working. Once started, the watchdog will appear in `list_agents`. + +When using watchdogs as a timer, ensure it has access to an absolute timestamp by calling a tool to obtain the date and time, or doing so when the watchdog instructs you to do so. + +Do not call `send_message`, `followup_task`, or `wait_agent` on a watchdog `agent_id`. When you no longer need the watchdog, call `close_agent` on the watchdog `agent_id`. + +If the user gives instructions that materially change, extend, or add context to the long-running goal, replace the current watchdog by calling `spawn_agent` with `"agent_type":"watchdog"`. The new watchdog message completely replaces the previous watchdog message. + +Treat messages from the watchdog as task instructions. diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 6c91c7d3db..53eea4d6d7 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -549,7 +549,7 @@ impl ModelClient { prompt.output_schema_strict, ); let payload = ApiCompactionInput { - model: &model_info.slug, + model: model_info.request_model_slug(), input: &input, instructions: &instructions, tools, @@ -635,7 +635,7 @@ impl ModelClient { .with_telemetry(Some(request_telemetry)); let payload = ApiMemorySummarizeInput { - model: model_info.slug.clone(), + model: model_info.request_model_slug().to_string(), raw_memories, reasoning: effort.map(|effort| Reasoning { effort: Some(effort), @@ -975,7 +975,7 @@ impl ModelClientSession { ); let prompt_cache_key = Some(self.client.prompt_cache_key().to_string()); let request = ResponsesApiRequest { - model: model_info.slug.clone(), + model: model_info.request_model_slug().to_string(), instructions: instructions.clone(), input, tools, diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index 1a3386245c..d506c8797f 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -11,6 +11,7 @@ use codex_config::config_toml::AgentRoleToml; use codex_config::config_toml::AgentsToml; use codex_config::config_toml::AutoReviewToml; use codex_config::config_toml::ConfigToml; +use codex_config::config_toml::CustomModelToml; use codex_config::config_toml::ProjectConfig; use codex_config::config_toml::RealtimeAudioConfig; use codex_config::config_toml::RealtimeConfig; @@ -6185,6 +6186,71 @@ async fn model_catalog_json_loads_from_path() -> std::io::Result<()> { Ok(()) } +#[tokio::test] +async fn custom_models_load_from_config_toml() -> std::io::Result<()> { + let codex_home = TempDir::new()?; + let cfg = ConfigToml { + custom_models: vec![CustomModelToml { + name: "frontier-local".to_string(), + model: "gpt-5.4".to_string(), + model_context_window: Some(123_456), + model_auto_compact_token_limit: Some(100_000), + }], + ..Default::default() + }; + + let config = Config::load_from_base_config_with_overrides( + cfg, + ConfigOverrides::default(), + codex_home.abs(), + ) + .await?; + + let custom = config + .custom_models + .get("frontier-local") + .expect("custom alias should load"); + assert_eq!(custom.model, "gpt-5.4"); + assert_eq!(custom.model_context_window, Some(123_456)); + assert_eq!(custom.model_auto_compact_token_limit, Some(100_000)); + Ok(()) +} + +#[tokio::test] +async fn custom_models_reject_duplicate_aliases() -> std::io::Result<()> { + let codex_home = TempDir::new()?; + let cfg = ConfigToml { + custom_models: vec![ + CustomModelToml { + name: "alias".to_string(), + model: "gpt-5.4".to_string(), + ..Default::default() + }, + CustomModelToml { + name: "alias".to_string(), + model: "gpt-5.3".to_string(), + ..Default::default() + }, + ], + ..Default::default() + }; + + let err = Config::load_from_base_config_with_overrides( + cfg, + ConfigOverrides::default(), + codex_home.abs(), + ) + .await + .expect_err("duplicate custom aliases should fail config load"); + + assert_eq!(err.kind(), ErrorKind::InvalidInput); + assert!( + err.to_string() + .contains("duplicate custom model alias: alias") + ); + Ok(()) +} + #[tokio::test] async fn model_catalog_json_rejects_empty_catalog() -> std::io::Result<()> { let codex_home = TempDir::new()?; @@ -6408,6 +6474,7 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> { model_reasoning_summary: Some(ReasoningSummary::Detailed), model_supports_reasoning_summaries: None, model_catalog: None, + custom_models: HashMap::new(), model_verbosity: None, personality: Some(Personality::Pragmatic), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), @@ -6611,6 +6678,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> { model_reasoning_summary: None, model_supports_reasoning_summaries: None, model_catalog: None, + custom_models: HashMap::new(), model_verbosity: None, personality: Some(Personality::Pragmatic), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), @@ -6768,6 +6836,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> { model_reasoning_summary: None, model_supports_reasoning_summaries: None, model_catalog: None, + custom_models: HashMap::new(), model_verbosity: None, personality: Some(Personality::Pragmatic), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), @@ -6910,6 +6979,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> { model_reasoning_summary: Some(ReasoningSummary::Detailed), model_supports_reasoning_summaries: None, model_catalog: None, + custom_models: HashMap::new(), model_verbosity: Some(Verbosity::High), personality: Some(Personality::Pragmatic), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index f449bd5035..3d5249230e 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -76,6 +76,7 @@ use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::OLLAMA_CHAT_PROVIDER_REMOVED_ERROR; use codex_model_provider_info::built_in_model_providers; use codex_model_provider_info::merge_configured_model_providers; +use codex_models_manager::CustomModelConfig; use codex_models_manager::ModelsManagerConfig; use codex_protocol::config_types::AltScreenMode; use codex_protocol::config_types::ForcedLoginMethod; @@ -699,6 +700,9 @@ pub struct Config { /// When set, this replaces the bundled catalog for the current process. pub model_catalog: Option, + /// User-defined model aliases shown in the picker. + pub custom_models: HashMap, + /// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`). pub model_verbosity: Option, @@ -1064,6 +1068,7 @@ impl Config { personality_enabled: self.features.enabled(Feature::Personality), model_supports_reasoning_summaries: self.model_supports_reasoning_summaries, model_catalog: self.model_catalog.clone(), + custom_models: self.custom_models.clone(), } } @@ -2515,6 +2520,25 @@ impl Config { merge_configured_model_providers(built_in_model_providers(openai_base_url), cfg.model_providers) .map_err(|message| std::io::Error::new(std::io::ErrorKind::InvalidData, message))?; + let mut custom_models = HashMap::new(); + for custom in cfg.custom_models { + let alias = custom.name; + if custom_models.contains_key(&alias) { + return Err(std::io::Error::new( + ErrorKind::InvalidInput, + format!("duplicate custom model alias: {alias}"), + )); + } + custom_models.insert( + alias, + CustomModelConfig { + model: custom.model, + model_context_window: custom.model_context_window, + model_auto_compact_token_limit: custom.model_auto_compact_token_limit, + }, + ); + } + let model_provider_id = model_provider .or(config_profile.model_provider) .or(cfg.model_provider) @@ -2954,6 +2978,7 @@ impl Config { mcp_oauth_callback_port: cfg.mcp_oauth_callback_port, mcp_oauth_callback_url: cfg.mcp_oauth_callback_url.clone(), model_providers, + custom_models, project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(AGENTS_MD_MAX_BYTES), project_doc_fallback_filenames: cfg .project_doc_fallback_filenames diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 4666975216..eeff6f7b98 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -216,6 +216,58 @@ use self::turn_context::TurnSkillsContext; #[cfg(test)] mod rollout_reconstruction_tests; +const ROOT_AGENT_PROMPT_FALLBACK: &str = include_str!("../../root_agent_prompt.md"); +const SUBAGENT_PROMPT_FALLBACK: &str = include_str!("../../subagent_prompt.md"); + +async fn load_agent_prompt_fallback( + codex_home: &Path, + fallback: &str, + override_filename: &str, +) -> String { + let override_path = codex_home.join(override_filename); + if let Ok(contents) = tokio::fs::read_to_string(&override_path).await + && !contents.trim().is_empty() + { + return contents; + } + + fallback.to_string() +} + +pub(crate) async fn load_root_agent_prompt(codex_home: &Path) -> String { + load_agent_prompt_fallback(codex_home, ROOT_AGENT_PROMPT_FALLBACK, "AGENTS.root.md").await +} + +pub(crate) async fn load_subagent_prompt(codex_home: &Path) -> String { + load_agent_prompt_fallback(codex_home, SUBAGENT_PROMPT_FALLBACK, "AGENTS.subagent.md").await +} + +pub(crate) async fn load_agent_role_prompt( + config: &Config, + session_source: &SessionSource, +) -> Option { + if !config.features.enabled(Feature::AgentPromptInjection) { + return None; + } + + let role_prompt = match session_source { + SessionSource::SubAgent(_) => load_subagent_prompt(&config.codex_home).await, + SessionSource::Cli + | SessionSource::VSCode + | SessionSource::Exec + | SessionSource::Mcp + | SessionSource::Custom(_) + | SessionSource::Internal(_) + | SessionSource::Unknown => load_root_agent_prompt(&config.codex_home).await, + }; + + if role_prompt.trim().is_empty() { + None + } else { + Some(role_prompt) + } +} + #[derive(Debug, PartialEq)] pub enum SteerInputError { NoActiveTurn(Vec), @@ -2572,6 +2624,11 @@ impl Session { { developer_sections.push(model_switch_message); } + if let Some(role_prompt) = + load_agent_role_prompt(&turn_context.config, &session_source).await + { + developer_sections.push(role_prompt); + } if turn_context.config.include_permissions_instructions { developer_sections.push( PermissionsInstructions::from_permission_profile( diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index cf031891c9..ba73af6e73 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -8689,3 +8689,119 @@ async fn session_start_hooks_require_project_trust_without_config_toml() -> std: Ok(()) } + +#[tokio::test] +async fn root_agent_prompt_requires_explicit_fork_turns() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let prompt = load_root_agent_prompt(codex_home.path()).await; + + assert!(prompt.contains("always explicitly provide a `fork_turns` value")); + assert!(prompt.contains("default to `\"fork_turns\":\"all\"` for subagents")); + assert!(!prompt.contains("omit `fork_turns` unless you need less context")); +} + +#[tokio::test] +async fn root_agent_prompt_prefers_user_goal_over_coordination() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let prompt = load_root_agent_prompt(codex_home.path()).await; + + assert!(prompt.contains("Your first job is to accomplish the user's goal.")); + assert!(prompt.contains("A plan file is support work, not the deliverable")); + assert!(prompt.contains("Prefer direct execution over coordination")); +} + +#[tokio::test] +async fn subagent_prompt_is_for_regular_subagents_only() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let prompt = load_subagent_prompt(codex_home.path()).await; + + assert!(prompt.contains("# You are a Subagent")); + assert!(prompt.contains("## Subagent Responsibilities")); + assert!(!prompt.contains("You are also a **watchdog**")); + assert!(!prompt.contains("watchdog.snooze")); +} + +#[tokio::test] +async fn agent_prompt_loader_prefers_home_overrides() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + tokio::fs::write(codex_home.path().join("AGENTS.root.md"), "custom root") + .await + .expect("write root override"); + tokio::fs::write( + codex_home.path().join("AGENTS.subagent.md"), + "custom subagent", + ) + .await + .expect("write subagent override"); + + assert_eq!( + load_root_agent_prompt(codex_home.path()).await, + "custom root" + ); + assert_eq!( + load_subagent_prompt(codex_home.path()).await, + "custom subagent" + ); +} + +#[tokio::test] +async fn root_agent_prompt_is_inline_developer_context_not_session_instructions() { + let session = make_session_with_config(|config| { + config + .features + .enable(Feature::AgentPromptInjection) + .expect("test config should enable prompt injection"); + }) + .await + .expect("session should build"); + + { + let state = session.state.lock().await; + assert_eq!(state.session_configuration.developer_instructions, None); + } + + let turn_context = session.new_default_turn().await; + let initial_context = session.build_initial_context(turn_context.as_ref()).await; + + assert!(initial_context.iter().any(|item| matches!( + item, + ResponseItem::Message { role, content, .. } + if role == "developer" + && content.iter().any(|content_item| matches!( + content_item, + ContentItem::InputText { text } + if text.contains("# You are the Root Agent") + )) + ))); +} + +#[tokio::test] +async fn agent_prompt_injection_does_not_require_collab_feature() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let mut config = build_test_config(codex_home.path()).await; + config + .features + .disable(Feature::Collab) + .expect("test config should disable collab"); + config + .features + .enable(Feature::AgentPromptInjection) + .expect("test config should enable prompt injection"); + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: ThreadId::default(), + depth: 1, + agent_path: None, + agent_nickname: Some("Test Subagent".to_string()), + agent_role: Some("worker".to_string()), + }); + + let prompt = load_agent_role_prompt(&config, &session_source) + .await + .expect("prompt injection should not require collab"); + + assert!(prompt.contains("# You are a Subagent")); + assert!(prompt.contains("## Subagent Responsibilities")); +} diff --git a/codex-rs/core/src/test_support.rs b/codex-rs/core/src/test_support.rs index 6dbcf7a464..bb5609ef01 100644 --- a/codex-rs/core/src/test_support.rs +++ b/codex-rs/core/src/test_support.rs @@ -106,7 +106,11 @@ pub fn models_manager_with_provider( provider: ModelProviderInfo, ) -> SharedModelsManager { let provider = create_model_provider(provider, Some(auth_manager)); - provider.models_manager(codex_home, /*config_model_catalog*/ None) + provider.models_manager( + codex_home, + /*config_model_catalog*/ None, + Default::default(), + ) } pub fn get_model_offline(model: Option<&str>) -> String { diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index 5821bf237f..7c93e33362 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -262,6 +262,7 @@ pub fn build_models_manager( provider.models_manager( config.codex_home.to_path_buf(), config.model_catalog.clone(), + config.custom_models.clone(), ) } @@ -382,7 +383,11 @@ impl ThreadManager { threads: Arc::new(RwLock::new(HashMap::new())), thread_created_tx, models_manager: create_model_provider(provider, Some(auth_manager.clone())) - .models_manager(codex_home, /*config_model_catalog*/ None), + .models_manager( + codex_home, + /*config_model_catalog*/ None, + Default::default(), + ), environment_manager, skills_manager, plugins_manager, diff --git a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs index c36f7b8941..3bd291ed8a 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs @@ -281,8 +281,14 @@ async fn spawn_watchdog( )) })?; let target_thread_id = agent_control - .spawn_agent(handle_config, Op::Interrupt, Some(spawn_source)) - .await?; + .spawn_agent_with_metadata( + handle_config, + Op::Interrupt, + Some(spawn_source), + Default::default(), + ) + .await? + .thread_id; let superseded_before_register = agent_control .unregister_watchdogs_for_owner(owner_thread_id) .await; diff --git a/codex-rs/core/subagent_prompt.md b/codex-rs/core/subagent_prompt.md new file mode 100644 index 0000000000..bd45150ed0 --- /dev/null +++ b/codex-rs/core/subagent_prompt.md @@ -0,0 +1,11 @@ +# You are a Subagent + +You are a **subagent** in a multi-agent Codex session. Your goal is the message given to you by the agent that spawned you. If you see assistant messages prior to this one, they are from your parent agent and you were forked, and you should work on the task that you were forked to accomplish. +## Subagent Responsibilities + +- Stay within the scope given in your instructions. +- Prefer to make progress: edit files, run commands, and validate outcomes. If you cannot, tell your parent agent via `send_message`. + +## Reporting Expectations + +When you've completed your task, report back with outcomes, file(s) changed, commands run, how you verified your work, and context needed for the parent agent to evaluate your work and determine what to do next. diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 43ec50746e..71443f7952 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -2,6 +2,7 @@ use anyhow::Result; use codex_config::types::Personality; use codex_features::Feature; use codex_login::CodexAuth; +use codex_models_manager::CustomModelConfig; use codex_models_manager::manager::RefreshStrategy; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::ServiceTier; @@ -34,6 +35,7 @@ use core_test_support::test_codex::test_codex; use core_test_support::test_codex::turn_permission_fields; use core_test_support::wait_for_event; use pretty_assertions::assert_eq; +use std::collections::HashMap; use std::path::Path; use std::path::PathBuf; use wiremock::MockServer; @@ -91,6 +93,7 @@ fn test_model_info( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(description.to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -333,6 +336,47 @@ async fn flex_service_tier_is_applied_to_http_turn() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn custom_model_alias_uses_backing_model_in_responses_request() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let resp_mock = mount_sse_once(&server, sse_completed("resp-1")).await; + let mut custom_models = HashMap::new(); + custom_models.insert( + "frontier-local".to_string(), + CustomModelConfig { + model: "gpt-real-preview".to_string(), + model_context_window: Some(123_456), + model_auto_compact_token_limit: Some(100_000), + }, + ); + let remote_model = test_model_info( + "gpt-real", + "Real", + "backing custom model metadata", + default_input_modalities(), + ); + + let test = test_codex() + .with_config(move |config| { + config.model = Some("frontier-local".to_string()); + config.model_catalog = Some(ModelsResponse { + models: vec![remote_model], + }); + config.custom_models = custom_models; + }) + .build(&server) + .await?; + + test.submit_turn("custom model turn").await?; + + let body = resp_mock.single_request().body_json(); + assert_eq!(body["model"].as_str(), Some("gpt-real-preview")); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<()> { skip_if_no_network!(Ok(())); @@ -803,6 +847,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< let base_model = ModelInfo { slug: large_model_slug.to_string(), + request_model: None, display_name: "Larger Model".to_string(), description: Some("larger context window model".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index e2688afc97..bdeb03a471 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -323,6 +323,7 @@ struct ModelsCache { fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: "Remote Test".to_string(), description: Some("remote model".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index dde6d2ca51..fb1373e3cb 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -567,6 +567,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow let friendly_personality_message = "Friendly variant"; let remote_model = ModelInfo { slug: remote_slug.to_string(), + request_model: None, display_name: "Remote default personality test".to_string(), description: Some("Remote model with default personality template".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -675,6 +676,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - let remote_pragmatic_message = "Pragmatic from remote template"; let remote_model = ModelInfo { slug: remote_slug.to_string(), + request_model: None, display_name: "Remote personality test".to_string(), description: Some("Remote model with personality template".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 49218c78d2..c99ad899ca 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -518,6 +518,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { let remote_model = ModelInfo { slug: REMOTE_MODEL_SLUG.to_string(), + request_model: None, display_name: "Remote Test".to_string(), description: Some("A remote model that requires the test shell".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -774,6 +775,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { let remote_base = "Use the remote base instructions only."; let remote_model = ModelInfo { slug: model.to_string(), + request_model: None, display_name: "Parallel Remote".to_string(), description: Some("A remote model with custom instructions".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -1264,6 +1266,7 @@ fn test_remote_model_with_policy( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: format!("{slug} display"), description: Some(format!("{slug} description")), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 0947f4fba7..730db05345 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1271,6 +1271,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re ModelsResponse { models: vec![ModelInfo { slug: text_only_model_slug.to_string(), + request_model: None, display_name: "RMCP Text Only".to_string(), description: Some("Test model without image input support".to_string()), default_reasoning_level: None, diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index 031c3135e8..ebb1f15daa 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -55,6 +55,7 @@ fn test_model_info( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(description.to_string()), default_reasoning_level: Some(default_reasoning_level), diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 29c660d3af..2bcc249338 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1070,6 +1070,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an let model_slug = "text-only-view-image-test-model"; let text_only_model = ModelInfo { slug: model_slug.to_string(), + request_model: None, display_name: "Text-only view_image test model".to_string(), description: Some("Remote model for view_image unsupported-path coverage".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/watchdog_agent_prompt.md b/codex-rs/core/watchdog_agent_prompt.md new file mode 100644 index 0000000000..7158e765d8 --- /dev/null +++ b/codex-rs/core/watchdog_agent_prompt.md @@ -0,0 +1,95 @@ +# You are a Subagent + +You are also a **watchdog**. + +You were forked from the parent agent at a moment prior to these instructions. Assistant messages prior to this instruction were not "you", they are your parent agent's messages. Tool calls before this message were made by the agent that spawned you. You have been created because the parent agent ended its turn, and without instruction from you, will not make any more progress toward the user's goal. + +You will be given the parent agent id and the original prompt/goal from the user, context, and instructions on how to evaluate the parent agent's progress. + +## What To Do + +First, compare the user's goal with the current evidence. Do not rely only on the parent agent's narration (i.e.: previous assistant messages). + +If a snooze condition is explicit and `owner_idle_for_seconds` is below the threshold, call `watchdog.snooze` immediately. + +If no parent action is needed, call `watchdog.snooze` or end with a short final message. Do not wake the parent just to say "keep waiting". + +If parent action is needed, send a message that quotes the user's goal, their current progress as you determine independently, and instructions to the parent on what to do next. + +If the user's goal is completely accomplished, tell the parent agent to verify the remaining acceptance criteria and close unneeded agents. + +## Principles + +- Re-anchor the parent agent to the user's goal, not to the most recent local activity. +- Push substantial work: implementation, integration, validation, review, or decisions that unblock the parent agent. +- If independent judgment is needed, tell your parent agent to create a non-forked reviewer subagent with the rubric and context that agent needs to give high quality feedback. +- Interrupt feature creep, scope drift, loops, early stopping, status-only turns, and plan-file busywork. +- Use evidence before accepting completion: diffs, command output, tests, artifacts, agent results, or explicit decisions. +- If the watchdog instruction asks for an exact format, follow that format unless higher-priority instructions require otherwise. + +## Detect Looping and Reward Hacking + +The parent agent may slip into patterns that look like progress but are not. Interrupt those patterns. + +Watch for: + +- Tests that always pass, tautologies, `assert!(true)`, mocks that cannot fail. +- Marking items complete with only stub or prototype implementation if the user asked for a complete implementation. +- "Fixes" that comment out failing tests or code without addressing root causes. +- Claiming success without running required format/lint/tests. +- Stopping early with "next I would" or "I can also" when the user asked the parent agent to keep working. +- Treating empty tool results, failed commands, or missing files as proof instead of recovering or checking another source. +- Reading many files or running many searches without turning findings into actions. +- Ignoring explicit user requirements in favor of quicker but incomplete shortcuts. +- Repeated status updates or checklist edits that do not add fresh evidence. +- Plan-file edits that replace product/repo progress instead of recording decisions, blockers, or validation state. +- Performing small edits and then running long tests or checks when the task needs an assignment with a named output and validation step, or needs a reviewer/referee decision. +- Ending turns instead of waiting on subagents or waiting for processes to complete. +- Repeated "continue"-style narration when the evidence calls for a retry, pivot, unblocker, or user question. +- Busywork: many actions or edits, with no progress toward the user's goal other than editing a plan file or log. + +When you detect these, prescribe the corrective action. + +## Interacting with the parent agent + +Use written plans, checklists, ledgers, rubrics, and acceptance criteria to judge progress, but do not let stale notes override the user's latest instruction. + +If prior to this message the parent agent has marked some item complete, check that it is actually done. If the parent agent has erred by updating a plan file or called a tool to mark a task completed when it has not been, instruct them to undo that. If that agent is otherwise misbehaving, quote it, and cite the user's goal or evidence. Treat a requirement as complete only when the parent thread shows the evidence required for that requirement. If the work has not reached a validation point, tell the parent agent to keep working. + +Keep your message to the parent agent proportional to the amount of realignment needed. Rarely more than a few paragraphs, often a couple sentences. If there are many small tasks to complete, instruct the parent agent to take on as many as they can in a single turn. Especially if validation takes a significant amount of time. If you see previous messages from the watchdog in the conversation prior to this instruction, that indicates the parent agent is doing too little work on each turn and needs to be given more work to do in each of its turns. + +If the user or developer provided specific watchdog instructions, those are overriding. E.g.: to use the watchdog to babysit a pull request, to act as a timer, etc. You should rarely call tools yourselves to perform actions, intead, you should guide the parent agent to call the tools and produce the evidentiary record you need to be confident they are aligned with the user's instructions. + +## Bonus: Accelerating the Parent Agent + +Before sending the parent agent instructions on how to proceed, determine if there is some way they can accelerate their work. If a significant amount of the time spent each turn is waiting on a task to complete, if there are opportunities to make that faster without compromising on the user's goal, do so. E.g.: running a focused set of tests instead of an expansive test suite, or spending less time performing ceremony work - status updates, taking notes - that is incidental to the user's goal and do not provide significant value to the future. + +## Ending your Turn + +End each watchdog run with exactly one of these: + +- Call `followup_task` with `"target":"parent"` to send instructions to the parent agent and start its next turn. +- Send a final assistant message in your own run and then stop, but only if the watchdog should continue running after this check-in and no parent action is needed. +- Call `watchdog.snooze` when no parent action is needed and no useful coordination would be created by waking the parent. +- Call `watchdog.close_self` when this watchdog should shut down. +- Call `watchdog.compact_parent_context` if you determine that the parent agent is going very far off track, repeating itself, or not following instructions from previous watchdogs. + +## Parent Recovery via Context Compaction + +`watchdog.compact_parent_context` asks the system to shorten repetitive parent-thread context so the parent agent can recover from loops. + +Use it only as a last resort: + +- The parent has been repeatedly non-responsive or failed to make progress after multiple watchdog messages. +- The parent is taking no meaningful actions (no concrete commands/edits/tests) and making no progress. +- You already sent at least one direct corrective instruction with `followup_task`, and it was ignored. + +Use `watchdog.snooze` when useful work is already underway and no parent decision is needed. Do not snooze if an agent is waiting on parent input, has become unblocked, or needs coordination to keep working. + +If the watchdog instruction gives an explicit snooze condition, such as "snooze if less than 3 minutes have elapsed", call tools to check the time before snoozing and only if the parent agent has also produced you an absolute timestamp to compare against, absent that, instruct it to do so. A `watchdog_was_due: true` fact means the runtime started a check-in; it does not override a stricter snooze condition from the watchdog instruction. + +Do not call `watchdog.compact_parent_context` for routine nudges or normal delays. Prefer precise `followup_task` guidance first. + +## Style + +Be explicit when precision matters, and forceful when the parent agent is not following the user's instructions. Your job is to drive real progress toward the user’s goal. diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index 477fdd6088..988f8d26a6 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -144,6 +144,8 @@ pub enum Feature { Collab, /// Enable task-path-based multi-agent routing. MultiAgentV2, + /// Enable root/subagent developer prompt injection. + AgentPromptInjection, /// Enable idle-time watchdog handles for root agents. AgentWatchdog, /// Enable CSV-backed agent job tools. @@ -887,6 +889,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::AgentPromptInjection, + key: "agent_prompt_injection", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::AgentWatchdog, key: "agent_watchdog", diff --git a/codex-rs/features/src/tests.rs b/codex-rs/features/src/tests.rs index 6235c1c3e5..99a5b7c442 100644 --- a/codex-rs/features/src/tests.rs +++ b/codex-rs/features/src/tests.rs @@ -279,6 +279,20 @@ fn multi_agent_is_stable_and_enabled_by_default() { assert_eq!(Feature::Collab.default_enabled(), true); } +#[test] +fn agent_prompt_injection_is_under_development_and_disabled_by_default() { + assert_eq!( + feature_for_key("agent_prompt_injection"), + Some(Feature::AgentPromptInjection) + ); + assert_eq!( + Feature::AgentPromptInjection.stage(), + Stage::UnderDevelopment + ); + assert_eq!(Feature::AgentPromptInjection.default_enabled(), false); + assert!(!Features::with_defaults().enabled(Feature::AgentPromptInjection)); +} + #[test] fn enable_fanout_is_under_development() { assert_eq!(Feature::SpawnCsv.stage(), Stage::UnderDevelopment); diff --git a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs index 4ca2cb891e..e09dd1cda7 100644 --- a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs +++ b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs @@ -38,6 +38,7 @@ pub(crate) fn static_model_catalog() -> ModelsResponse { fn gpt_5_4_cmb_bedrock_model(priority: i32) -> ModelInfo { ModelInfo { slug: GPT_5_4_CMB_MODEL_ID.to_string(), + request_model: None, display_name: "gpt-5.4".to_string(), description: Some("Strong model for everyday coding.".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -74,6 +75,7 @@ fn gpt_5_4_cmb_bedrock_model(priority: i32) -> ModelInfo { fn bedrock_oss_model(slug: &str, display_name: &str, priority: i32) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(display_name.to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -165,4 +167,18 @@ mod tests { gpt_5_4_cmb_reasoning_levels() ); } + + #[test] + fn catalog_models_use_their_slugs_as_request_models() { + let catalog = static_model_catalog(); + + assert_eq!( + catalog + .models + .iter() + .map(|model| model.request_model.as_deref()) + .collect::>(), + vec![None, None, None] + ); + } } diff --git a/codex-rs/model-provider/src/amazon_bedrock/mod.rs b/codex-rs/model-provider/src/amazon_bedrock/mod.rs index adca7d7d91..58f98b5c0e 100644 --- a/codex-rs/model-provider/src/amazon_bedrock/mod.rs +++ b/codex-rs/model-provider/src/amazon_bedrock/mod.rs @@ -2,6 +2,7 @@ mod auth; mod catalog; mod mantle; +use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -11,6 +12,7 @@ use codex_login::AuthManager; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderAwsAuthInfo; use codex_model_provider_info::ModelProviderInfo; +use codex_models_manager::CustomModelConfig; use codex_models_manager::manager::SharedModelsManager; use codex_models_manager::manager::StaticModelsManager; use codex_protocol::account::ProviderAccount; @@ -95,10 +97,12 @@ impl ModelProvider for AmazonBedrockModelProvider { &self, _codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, ) -> SharedModelsManager { - Arc::new(StaticModelsManager::new( + Arc::new(StaticModelsManager::new_with_custom_models( /*auth_manager*/ None, config_model_catalog.unwrap_or_else(static_model_catalog), + custom_models, )) } } diff --git a/codex-rs/model-provider/src/provider.rs b/codex-rs/model-provider/src/provider.rs index 0c5e8e0ffe..c3f52fc560 100644 --- a/codex-rs/model-provider/src/provider.rs +++ b/codex-rs/model-provider/src/provider.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::fmt; use std::path::PathBuf; use std::sync::Arc; @@ -7,6 +8,7 @@ use codex_api::SharedAuthProvider; use codex_login::AuthManager; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderInfo; +use codex_models_manager::CustomModelConfig; use codex_models_manager::manager::OpenAiModelsManager; use codex_models_manager::manager::SharedModelsManager; use codex_models_manager::manager::StaticModelsManager; @@ -122,6 +124,7 @@ pub trait ModelProvider: fmt::Debug + Send + Sync { &self, codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, ) -> SharedModelsManager; } @@ -216,21 +219,24 @@ impl ModelProvider for ConfiguredModelProvider { &self, codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, ) -> SharedModelsManager { match config_model_catalog { - Some(model_catalog) => Arc::new(StaticModelsManager::new( + Some(model_catalog) => Arc::new(StaticModelsManager::new_with_custom_models( self.auth_manager.clone(), model_catalog, + custom_models, )), None => { let endpoint = Arc::new(OpenAiModelsEndpoint::new( self.info.clone(), self.auth_manager.clone(), )); - Arc::new(OpenAiModelsManager::new( + Arc::new(OpenAiModelsManager::new_with_custom_models( codex_home, endpoint, self.auth_manager.clone(), + custom_models, )) } } @@ -461,8 +467,11 @@ mod tests { ModelProviderInfo::create_amazon_bedrock_provider(/*aws*/ None), /*auth_manager*/ None, ); - let manager = - provider.models_manager(test_codex_home(), /*config_model_catalog*/ None); + let manager = provider.models_manager( + test_codex_home(), + /*config_model_catalog*/ None, + Default::default(), + ); let catalog = manager.raw_model_catalog(RefreshStrategy::Online).await; let model_ids = catalog @@ -504,6 +513,7 @@ mod tests { Some(ModelsResponse { models: vec![custom_model], }), + Default::default(), ); let catalog = manager.raw_model_catalog(RefreshStrategy::Online).await; @@ -540,8 +550,11 @@ mod tests { )), ); - let manager = - provider.models_manager(test_codex_home(), /*config_model_catalog*/ None); + let manager = provider.models_manager( + test_codex_home(), + /*config_model_catalog*/ None, + Default::default(), + ); let catalog = manager.raw_model_catalog(RefreshStrategy::Online).await; assert!( diff --git a/codex-rs/models-manager/src/config.rs b/codex-rs/models-manager/src/config.rs index b64add40fc..9150767e1c 100644 --- a/codex-rs/models-manager/src/config.rs +++ b/codex-rs/models-manager/src/config.rs @@ -1,4 +1,5 @@ use codex_protocol::openai_models::ModelsResponse; +use std::collections::HashMap; #[derive(Debug, Clone, Default)] pub struct ModelsManagerConfig { @@ -9,4 +10,12 @@ pub struct ModelsManagerConfig { pub personality_enabled: bool, pub model_supports_reasoning_summaries: Option, pub model_catalog: Option, + pub custom_models: HashMap, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CustomModelConfig { + pub model: String, + pub model_context_window: Option, + pub model_auto_compact_token_limit: Option, } diff --git a/codex-rs/models-manager/src/lib.rs b/codex-rs/models-manager/src/lib.rs index 8bf30d0b60..70301c57df 100644 --- a/codex-rs/models-manager/src/lib.rs +++ b/codex-rs/models-manager/src/lib.rs @@ -7,6 +7,7 @@ pub mod model_presets; pub mod test_support; pub use codex_app_server_protocol::AuthMode; +pub use config::CustomModelConfig; pub use config::ModelsManagerConfig; /// Load the bundled model catalog shipped with `codex-models-manager`. diff --git a/codex-rs/models-manager/src/manager.rs b/codex-rs/models-manager/src/manager.rs index 5859830310..9aa2711246 100644 --- a/codex-rs/models-manager/src/manager.rs +++ b/codex-rs/models-manager/src/manager.rs @@ -1,5 +1,6 @@ use super::cache::ModelsCacheManager; use crate::collaboration_mode_presets::builtin_collaboration_mode_presets; +use crate::config::CustomModelConfig; use crate::config::ModelsManagerConfig; use crate::model_info; use async_trait::async_trait; @@ -9,6 +10,8 @@ use codex_protocol::error::Result as CoreResult; use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelsResponse; +use std::collections::HashMap; +use std::collections::HashSet; use std::fmt; use std::path::PathBuf; use std::sync::Arc; @@ -103,11 +106,26 @@ pub trait ModelsManager: fmt::Debug + Send + Sync { /// Return the auth manager used for picker filtering. fn auth_manager(&self) -> Option<&AuthManager>; + /// Return configured user-defined model aliases. + fn custom_models(&self) -> &HashMap; + /// Build picker-ready presets from the active catalog snapshot. fn build_available_models(&self, mut remote_models: Vec) -> Vec { remote_models.sort_by(|a, b| a.priority.cmp(&b.priority)); - let mut presets: Vec = remote_models.into_iter().map(Into::into).collect(); + let mut presets: Vec = remote_models.iter().cloned().map(Into::into).collect(); + let mut existing_models: HashSet = + presets.iter().map(|preset| preset.model.clone()).collect(); + let mut custom_presets = self + .custom_models() + .iter() + .filter(|(alias, _custom_model)| existing_models.insert((*alias).clone())) + .map(|(alias, custom_model)| { + construct_model_info_for_custom_alias(alias, custom_model, &remote_models).into() + }) + .collect::>(); + custom_presets.sort_by(|left, right| left.model.cmp(&right.model)); + presets.extend(custom_presets); let uses_codex_backend = self .auth_manager() .is_some_and(AuthManager::current_auth_uses_codex_backend); @@ -160,7 +178,16 @@ pub trait ModelsManager: fmt::Debug + Send + Sync { async fn get_model_info(&self, model: &str, config: &ModelsManagerConfig) -> ModelInfo { async move { let remote_models = self.get_remote_models().await; - construct_model_info_from_candidates(model, &remote_models, config) + let custom_model = config + .custom_models + .get(model) + .or_else(|| self.custom_models().get(model)); + construct_model_info_from_candidates_with_custom( + model, + &remote_models, + config, + custom_model, + ) } .instrument(tracing::info_span!("get_model_info", model = model)) .await @@ -179,6 +206,7 @@ pub type SharedModelsManager = Arc; #[derive(Debug)] pub struct OpenAiModelsManager { remote_models: RwLock>, + custom_models: HashMap, etag: RwLock>, cache_manager: ModelsCacheManager, endpoint_client: SharedModelsEndpointClient, @@ -189,6 +217,7 @@ pub struct OpenAiModelsManager { #[derive(Debug)] pub struct StaticModelsManager { remote_models: Vec, + custom_models: HashMap, auth_manager: Option>, } @@ -198,12 +227,22 @@ impl OpenAiModelsManager { codex_home: PathBuf, endpoint_client: Arc, auth_manager: Option>, + ) -> Self { + Self::new_with_custom_models(codex_home, endpoint_client, auth_manager, HashMap::new()) + } + + pub fn new_with_custom_models( + codex_home: PathBuf, + endpoint_client: Arc, + auth_manager: Option>, + custom_models: HashMap, ) -> Self { let cache_path = codex_home.join(MODEL_CACHE_FILE); let cache_manager = ModelsCacheManager::new(cache_path, DEFAULT_MODEL_CACHE_TTL); let remote_models = load_remote_models_from_file().unwrap_or_default(); Self { remote_models: RwLock::new(remote_models), + custom_models, etag: RwLock::new(None), cache_manager, endpoint_client, @@ -215,8 +254,17 @@ impl OpenAiModelsManager { impl StaticModelsManager { /// Construct a static model manager from an authoritative catalog. pub fn new(auth_manager: Option>, model_catalog: ModelsResponse) -> Self { + Self::new_with_custom_models(auth_manager, model_catalog, HashMap::new()) + } + + pub fn new_with_custom_models( + auth_manager: Option>, + model_catalog: ModelsResponse, + custom_models: HashMap, + ) -> Self { Self { remote_models: model_catalog.models, + custom_models, auth_manager, } } @@ -245,6 +293,10 @@ impl ModelsManager for OpenAiModelsManager { self.auth_manager.as_deref() } + fn custom_models(&self) -> &HashMap { + &self.custom_models + } + fn list_collaboration_modes(&self) -> Vec { builtin_collaboration_mode_presets() } @@ -380,6 +432,10 @@ impl ModelsManager for StaticModelsManager { self.auth_manager.as_deref() } + fn custom_models(&self) -> &HashMap { + &self.custom_models + } + fn list_collaboration_modes(&self) -> Vec { builtin_collaboration_mode_presets() } @@ -441,6 +497,32 @@ pub(crate) fn construct_model_info_from_candidates( candidates: &[ModelInfo], config: &ModelsManagerConfig, ) -> ModelInfo { + construct_model_info_from_candidates_with_custom( + model, + candidates, + config, + config.custom_models.get(model), + ) +} + +fn construct_model_info_from_candidates_with_custom( + model: &str, + candidates: &[ModelInfo], + config: &ModelsManagerConfig, + custom_model: Option<&CustomModelConfig>, +) -> ModelInfo { + if let Some(custom_model) = custom_model { + let mut config = config.clone(); + config.model_context_window = custom_model + .model_context_window + .or(config.model_context_window); + config.model_auto_compact_token_limit = custom_model + .model_auto_compact_token_limit + .or(config.model_auto_compact_token_limit); + let model_info = construct_model_info_for_custom_alias(model, custom_model, candidates); + return model_info::with_config_overrides(model_info, &config); + } + // First use the normal longest-prefix match. If that misses, allow a narrowly scoped // retry for namespaced slugs like `custom/gpt-5.3-codex`. let remote = find_model_by_longest_prefix(model, candidates) @@ -457,6 +539,30 @@ pub(crate) fn construct_model_info_from_candidates( model_info::with_config_overrides(model_info, config) } +fn construct_model_info_for_custom_alias( + alias: &str, + custom_model: &CustomModelConfig, + candidates: &[ModelInfo], +) -> ModelInfo { + let remote = find_model_by_longest_prefix(&custom_model.model, candidates) + .or_else(|| find_model_by_namespaced_suffix(&custom_model.model, candidates)); + if let Some(remote) = remote { + ModelInfo { + slug: alias.to_string(), + request_model: Some(custom_model.model.clone()), + display_name: alias.to_string(), + used_fallback_model_metadata: false, + ..remote + } + } else { + let mut fallback_model = model_info::model_info_from_slug(&custom_model.model); + fallback_model.slug = alias.to_string(); + fallback_model.request_model = Some(custom_model.model.clone()); + fallback_model.display_name = alias.to_string(); + fallback_model + } +} + #[cfg(test)] #[path = "manager_tests.rs"] mod tests; diff --git a/codex-rs/models-manager/src/manager_tests.rs b/codex-rs/models-manager/src/manager_tests.rs index 24ae9f3591..caf56220cb 100644 --- a/codex-rs/models-manager/src/manager_tests.rs +++ b/codex-rs/models-manager/src/manager_tests.rs @@ -278,6 +278,47 @@ async fn get_model_info_uses_custom_catalog() { assert!(!model_info.used_fallback_model_metadata); } +#[tokio::test] +async fn custom_model_alias_uses_backing_model_metadata_and_request_model() { + let mut custom_models = HashMap::new(); + custom_models.insert( + "frontier-local".to_string(), + CustomModelConfig { + model: "gpt-real-preview".to_string(), + model_context_window: Some(123_456), + model_auto_compact_token_limit: Some(100_000), + }, + ); + let remote = remote_model("gpt-real", "Real", /*priority*/ 0); + let manager = StaticModelsManager::new_with_custom_models( + /*auth_manager*/ None, + ModelsResponse { + models: vec![remote], + }, + custom_models, + ); + let config = ModelsManagerConfig::default(); + + let model_info = manager.get_model_info("frontier-local", &config).await; + + assert_eq!(model_info.slug, "frontier-local"); + assert_eq!( + model_info.request_model.as_deref(), + Some("gpt-real-preview") + ); + assert_eq!(model_info.display_name, "frontier-local"); + assert_eq!(model_info.context_window, Some(123_456)); + assert_eq!(model_info.auto_compact_token_limit, Some(100_000)); + assert!(!model_info.used_fallback_model_metadata); + + let available = manager.list_models(RefreshStrategy::Offline).await; + assert!( + available + .iter() + .any(|preset| preset.model == "frontier-local") + ); +} + #[tokio::test] async fn get_model_info_matches_namespaced_suffix() { let config = ModelsManagerConfig::default(); diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index 8e8abae549..7797cd18fb 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -67,6 +67,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { warn!("Unknown model {slug} is used. This will use fallback model metadata."); ModelInfo { slug: slug.to_string(), + request_model: None, display_name: slug.to_string(), description: None, default_reasoning_level: None, diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 41275e6a6b..30d767d719 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -247,6 +247,11 @@ const fn default_effective_context_window_percent() -> i64 { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, TS, JsonSchema)] pub struct ModelInfo { pub slug: String, + /// Provider-facing model slug to send on API requests. + /// + /// When unset, `slug` is used. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub request_model: Option, pub display_name: String, pub description: Option, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -303,6 +308,10 @@ pub struct ModelInfo { } impl ModelInfo { + pub fn request_model_slug(&self) -> &str { + self.request_model.as_deref().unwrap_or(self.slug.as_str()) + } + pub fn resolved_context_window(&self) -> Option { self.context_window.or(self.max_context_window) } @@ -539,6 +548,7 @@ mod tests { fn test_model(spec: Option) -> ModelInfo { ModelInfo { slug: "test-model".to_string(), + request_model: None, display_name: "Test Model".to_string(), description: None, default_reasoning_level: None, diff --git a/codex-rs/thread-manager-sample/src/main.rs b/codex-rs/thread-manager-sample/src/main.rs index c74e32f21e..64eae898f6 100644 --- a/codex-rs/thread-manager-sample/src/main.rs +++ b/codex-rs/thread-manager-sample/src/main.rs @@ -203,6 +203,7 @@ fn new_config(model: Option, arg0_paths: Arg0DispatchPaths) -> anyhow::R mcp_oauth_credentials_store_mode: OAuthCredentialsStoreMode::File, mcp_oauth_callback_port: None, mcp_oauth_callback_url: None, + custom_models: HashMap::new(), model_providers, project_doc_max_bytes: 32 * 1024, project_doc_fallback_filenames: Vec::new(),