Files
codex/codex-rs/core/tests/suite/remote_models.rs
Celia Chen e8d8080818 feat: let model providers own model discovery (#18950)
## Why

`codex-models-manager` had grown to own provider-specific concerns:
constructing OpenAI-compatible `/models` requests, resolving provider
auth, emitting request telemetry, and deciding how provider catalogs
should be sourced. That made the manager harder to reuse for providers
whose model catalog is not fetched from the OpenAI `/models` endpoint,
such as Amazon Bedrock.

This change moves provider-specific model discovery behind
provider-owned implementations, so the models manager can focus on
refresh policy, cache behavior, picker ordering, and model metadata
merging.

## What Changed

- Introduced a `ModelsManager` trait with separate `OpenAiModelsManager`
and `StaticModelsManager` implementations.
- Added `ModelsEndpointClient` so OpenAI-compatible HTTP fetching lives
outside `codex-models-manager`.
- Moved `/models` request construction, provider auth resolution,
timeout handling, and request telemetry into `codex-model-provider` via
`OpenAiModelsEndpoint`.
- Added provider-owned `models_manager(...)` construction so configured
OpenAI-compatible providers use `OpenAiModelsManager`, while
static/catalog-backed providers can return `StaticModelsManager`.
- Added an Amazon Bedrock static model catalog for the GPT OSS Bedrock
model IDs.
- Updated core/session/thread manager code and tests to depend on
`Arc<dyn ModelsManager>`.
- Moved offline model test helpers into
`codex_models_manager::test_support`.
## Metadata References

The Bedrock catalog metadata is based on the official Amazon Bedrock
OpenAI model documentation:

- [Amazon Bedrock OpenAI
models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-openai.html)
lists the Bedrock model IDs, text input/output modalities, and `128,000`
token context window for `gpt-oss-20b` and `gpt-oss-120b`.
- [Amazon Bedrock `gpt-oss-120b` model
card](https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-openai-gpt-oss-120b.html)
lists the `bedrock-runtime` model ID `openai.gpt-oss-120b-1:0`, the
`bedrock-mantle` model ID `openai.gpt-oss-120b`, text-only modalities,
and `128K` context window.
- [OpenAI `gpt-oss-120b` model
docs](https://developers.openai.com/api/docs/models/gpt-oss-120b)
document configurable reasoning effort with `low`, `medium`, and `high`,
plus text input/output modality.

The display names, default reasoning effort, and priority ordering are
Codex-local catalog choices.

## Test Plan
- Manually verified app-server model listing with an AWS profile:

```shell
CODEX_HOME="$(mktemp -d)" cargo run -p codex-app-server-test-client -- \
  --codex-bin ./target/debug/codex \
  -c 'model_provider="amazon-bedrock"' \
  -c 'model_providers.amazon-bedrock.aws.profile="codex-bedrock"' \
  -c 'model_providers.amazon-bedrock.aws.region="us-west-2"' \
  model-list
```

The response returned the Bedrock catalog with `openai.gpt-oss-120b-1:0`
as the default model and `openai.gpt-oss-20b-1:0` as the second listed
model, both text-only and supporting low/medium/high reasoning effort.
2026-04-24 04:28:25 +00:00

1295 lines
42 KiB
Rust

#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used)]
use anyhow::Result;
use codex_login::CodexAuth;
use codex_model_provider_info::ModelProviderInfo;
use codex_model_provider_info::built_in_model_providers;
use codex_models_manager::bundled_models_response;
use codex_models_manager::manager::RefreshStrategy;
use codex_models_manager::manager::SharedModelsManager;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::openai_models::TruncationPolicyConfig;
use codex_protocol::openai_models::default_input_modalities;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecCommandSource;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_models_once_with_delay;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;
use tokio::time::timeout;
use wiremock::BodyPrintLimit;
use wiremock::MockServer;
const REMOTE_MODEL_SLUG: &str = "codex-test";
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_get_model_info_uses_longest_matching_prefix() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let generic = test_remote_model_with_policy(
"gpt-5.3",
ModelVisibility::List,
/*priority*/ 1_000,
TruncationPolicyConfig::bytes(/*limit*/ 10_000),
);
let specific = test_remote_model_with_policy(
"gpt-5.3-codex",
ModelVisibility::List,
/*priority*/ 1_000,
TruncationPolicyConfig::bytes(/*limit*/ 10_000),
);
let specific = ModelInfo {
display_name: "GPT 5.3 Codex".to_string(),
base_instructions: "use specific prefix".to_string(),
..specific
};
let generic = ModelInfo {
display_name: "GPT 5.3".to_string(),
base_instructions: "use generic prefix".to_string(),
..generic
};
mount_models_once(
&server,
ModelsResponse {
models: vec![generic.clone(), specific.clone()],
},
)
.await;
let codex_home = TempDir::new()?;
let config = load_default_config_for_test(&codex_home).await;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
manager.list_models(RefreshStrategy::OnlineIfUncached).await;
let model_info = manager
.get_model_info("gpt-5.3-codex-test", &config.to_models_manager_config())
.await;
assert_eq!(model_info.slug, "gpt-5.3-codex-test");
assert_eq!(model_info.base_instructions, specific.base_instructions);
Ok(())
}
/// Scenario: the model advertises a default 273k context window and a 400k max
/// context window, and the user explicitly configures 1M. This verifies the
/// runtime turn clamps the override to the advertised max window.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_config_context_window_override_clamps_to_max_context_window() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let requested_model = "gpt-5.4-test";
let mut remote_model =
test_remote_model("gpt-5.4", ModelVisibility::List, /*priority*/ 1_000);
remote_model.context_window = Some(273_000);
remote_model.max_context_window = Some(400_000);
remote_model.effective_context_window_percent = 100;
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let TestCodex {
codex, cwd, config, ..
} = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some(requested_model.to_string());
config.model_context_window = Some(1_000_000);
})
.build(&server)
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "check context window".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: config.permissions.approval_policy.value(),
approvals_reviewer: None,
sandbox_policy: config.permissions.sandbox_policy.get().clone(),
model: requested_model.to_string(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
permission_profile: None,
personality: None,
environments: None,
})
.await?;
let turn_started_event = wait_for_event(&codex, |event| {
matches!(
event,
EventMsg::TurnStarted(started)
if started.model_context_window == Some(400_000)
)
})
.await;
let EventMsg::TurnStarted(turn_started) = turn_started_event else {
unreachable!("wait_for_event returned unexpected event");
};
assert_eq!(turn_started.model_context_window, Some(400_000));
Ok(())
}
/// Scenario: the user explicitly configures a context window above the model's
/// max_context_window. This verifies the runtime window is clamped to the max
/// instead of using the oversized config value.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_config_override_above_max_uses_max_context_window() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let requested_model = "gpt-5.4-test";
let mut remote_model =
test_remote_model("gpt-5.4", ModelVisibility::List, /*priority*/ 1_000);
remote_model.context_window = Some(273_000);
remote_model.max_context_window = Some(400_000);
remote_model.effective_context_window_percent = 100;
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let TestCodex {
codex, cwd, config, ..
} = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some(requested_model.to_string());
config.model_context_window = Some(500_000);
})
.build(&server)
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "check context window".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: config.permissions.approval_policy.value(),
approvals_reviewer: None,
sandbox_policy: config.permissions.sandbox_policy.get().clone(),
model: requested_model.to_string(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
permission_profile: None,
personality: None,
environments: None,
})
.await?;
let turn_started_event = wait_for_event(&codex, |event| {
matches!(
event,
EventMsg::TurnStarted(started)
if started.model_context_window == Some(400_000)
)
})
.await;
let EventMsg::TurnStarted(turn_started) = turn_started_event else {
unreachable!("wait_for_event returned unexpected event");
};
assert_eq!(turn_started.model_context_window, Some(400_000));
Ok(())
}
/// Scenario: model metadata includes both context_window and max_context_window,
/// but the user did not configure an override. This verifies the runtime keeps
/// using the model's default context_window in the no-override path.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_use_context_window_when_config_override_is_absent() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let requested_model = "gpt-5.4-test";
let mut remote_model =
test_remote_model("gpt-5.4", ModelVisibility::List, /*priority*/ 1_000);
remote_model.context_window = Some(273_000);
remote_model.max_context_window = Some(400_000);
remote_model.effective_context_window_percent = 100;
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let TestCodex {
codex, cwd, config, ..
} = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some(requested_model.to_string());
})
.build(&server)
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "check context window".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: config.permissions.approval_policy.value(),
approvals_reviewer: None,
sandbox_policy: config.permissions.sandbox_policy.get().clone(),
model: requested_model.to_string(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
permission_profile: None,
personality: None,
environments: None,
})
.await?;
let turn_started_event = wait_for_event(&codex, |event| {
matches!(
event,
EventMsg::TurnStarted(started)
if started.model_context_window == Some(273_000)
)
})
.await;
let EventMsg::TurnStarted(turn_started) = turn_started_event else {
unreachable!("wait_for_event returned unexpected event");
};
assert_eq!(turn_started.model_context_window, Some(273_000));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_long_model_slug_is_sent_with_high_reasoning() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let requested_model = "gpt-5.3-codex-test";
let prefix_model = "gpt-5.3-codex";
let mut remote_model = test_remote_model_with_policy(
prefix_model,
ModelVisibility::List,
/*priority*/ 1_000,
TruncationPolicyConfig::bytes(/*limit*/ 10_000),
);
remote_model.default_reasoning_level = Some(ReasoningEffort::High);
remote_model.supported_reasoning_levels = vec![
ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
},
ReasoningEffortPreset {
effort: ReasoningEffort::High,
description: ReasoningEffort::High.to_string(),
},
];
remote_model.supports_reasoning_summaries = true;
remote_model.default_reasoning_summary = ReasoningSummary::Detailed;
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let response_mock = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let TestCodex {
codex, cwd, config, ..
} = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some(requested_model.to_string());
})
.build(&server)
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "check model slug".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: config.permissions.approval_policy.value(),
approvals_reviewer: None,
sandbox_policy: config.permissions.sandbox_policy.get().clone(),
permission_profile: None,
model: requested_model.to_string(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
environments: None,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let request = response_mock.single_request();
let body = request.body_json();
let reasoning_effort = body
.get("reasoning")
.and_then(|reasoning| reasoning.get("effort"))
.and_then(|value| value.as_str());
let reasoning_summary = body
.get("reasoning")
.and_then(|reasoning| reasoning.get("summary"))
.and_then(|value| value.as_str());
assert_eq!(body["model"].as_str(), Some(requested_model));
assert_eq!(reasoning_effort, Some("high"));
assert_eq!(reasoning_summary, Some("detailed"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn namespaced_model_slug_uses_catalog_metadata_without_fallback_warning() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let requested_model = "custom/gpt-5.2-codex";
let response_mock = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let TestCodex {
codex, cwd, config, ..
} = test_codex()
.with_model(requested_model)
.build(&server)
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "check namespaced model metadata".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: config.permissions.approval_policy.value(),
approvals_reviewer: None,
sandbox_policy: config.permissions.sandbox_policy.get().clone(),
permission_profile: None,
model: requested_model.to_string(),
effort: None,
summary: Some(
config
.model_reasoning_summary
.unwrap_or(ReasoningSummary::Auto),
),
service_tier: None,
collaboration_mode: None,
personality: None,
environments: None,
})
.await?;
let mut fallback_warning_count = 0;
loop {
let event = wait_for_event(&codex, |_| true).await;
match event {
EventMsg::Warning(warning)
if warning.message.contains("Defaulting to fallback metadata") =>
{
fallback_warning_count += 1;
}
EventMsg::TurnComplete(_) => break,
_ => {}
}
}
let body = response_mock.single_request().body_json();
assert_eq!(body["model"].as_str(), Some(requested_model));
assert_eq!(fallback_warning_count, 0);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let remote_model = ModelInfo {
slug: REMOTE_MODEL_SLUG.to_string(),
display_name: "Remote Test".to_string(),
description: Some("A remote model that requires the test shell".to_string()),
default_reasoning_level: Some(ReasoningEffort::Medium),
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::UnifiedExec,
visibility: ModelVisibility::List,
supported_in_api: true,
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
priority: 1,
additional_speed_tiers: Vec::new(),
upgrade: None,
base_instructions: "base instructions".to_string(),
model_messages: None,
supports_reasoning_summaries: false,
default_reasoning_summary: ReasoningSummary::Auto,
support_verbosity: false,
default_verbosity: None,
availability_nux: None,
apply_patch_tool_type: None,
web_search_tool_type: Default::default(),
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
max_context_window: None,
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
};
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some("gpt-5.4".to_string());
});
let TestCodex {
codex,
cwd,
config,
thread_manager,
..
} = builder.build(&server).await?;
let models_manager = thread_manager.get_models_manager();
let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG).await;
assert_eq!(available_model.model, REMOTE_MODEL_SLUG);
let requests = models_mock.requests();
assert_eq!(
requests.len(),
1,
"expected a single /models refresh request for the remote models feature"
);
assert_eq!(requests[0].url.path(), "/v1/models");
let model_info = models_manager
.get_model_info(REMOTE_MODEL_SLUG, &config.to_models_manager_config())
.await;
assert_eq!(model_info.shell_type, ConfigShellToolType::UnifiedExec);
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: None,
approvals_reviewer: None,
sandbox_policy: None,
permission_profile: None,
windows_sandbox_level: None,
model: Some(REMOTE_MODEL_SLUG.to_string()),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.await?;
let call_id = "call";
let args = json!({
"cmd": "/bin/echo call",
"yield_time_ms": 250,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run call".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: REMOTE_MODEL_SLUG.to_string(),
effort: None,
summary: Some(ReasoningSummary::Auto),
service_tier: None,
collaboration_mode: None,
personality: None,
environments: None,
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup);
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_truncation_policy_without_override_preserves_remote() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let slug = "codex-test-truncation-policy";
let remote_model = test_remote_model_with_policy(
slug,
ModelVisibility::List,
/*priority*/ 1,
TruncationPolicyConfig::bytes(/*limit*/ 12_000),
);
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some("gpt-5.4".to_string());
});
let test = builder.build(&server).await?;
let models_manager = test.thread_manager.get_models_manager();
wait_for_model_available(&models_manager, slug).await;
let model_info = models_manager
.get_model_info(slug, &test.config.to_models_manager_config())
.await;
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::bytes(/*limit*/ 12_000)
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_truncation_policy_with_tool_output_override() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let slug = "codex-test-truncation-override";
let remote_model = test_remote_model_with_policy(
slug,
ModelVisibility::List,
/*priority*/ 1,
TruncationPolicyConfig::bytes(/*limit*/ 10_000),
);
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some("gpt-5.4".to_string());
config.tool_output_token_limit = Some(50);
});
let test = builder.build(&server).await?;
let models_manager = test.thread_manager.get_models_manager();
wait_for_model_available(&models_manager, slug).await;
let model_info = models_manager
.get_model_info(slug, &test.config.to_models_manager_config())
.await;
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::bytes(/*limit*/ 200)
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_apply_remote_base_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let model = "test-gpt-5-remote";
let remote_base = "Use the remote base instructions only.";
let remote_model = ModelInfo {
slug: model.to_string(),
display_name: "Parallel Remote".to_string(),
description: Some("A remote model with custom instructions".to_string()),
default_reasoning_level: Some(ReasoningEffort::Medium),
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::ShellCommand,
visibility: ModelVisibility::List,
supported_in_api: true,
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
priority: 1,
additional_speed_tiers: Vec::new(),
upgrade: None,
base_instructions: remote_base.to_string(),
model_messages: None,
supports_reasoning_summaries: false,
default_reasoning_summary: ReasoningSummary::Auto,
support_verbosity: false,
default_verbosity: None,
availability_nux: None,
apply_patch_tool_type: None,
web_search_tool_type: Default::default(),
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
max_context_window: None,
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
};
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let response_mock = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.model = Some("gpt-5.2".to_string());
});
let TestCodex {
codex,
cwd,
config,
thread_manager,
..
} = builder.build(&server).await?;
let models_manager = thread_manager.get_models_manager();
wait_for_model_available(&models_manager, model).await;
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: None,
approvals_reviewer: None,
sandbox_policy: None,
permission_profile: None,
windows_sandbox_level: None,
model: Some(model.to_string()),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "hello remote".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: SandboxPolicy::DangerFullAccess,
permission_profile: None,
model: model.to_string(),
effort: None,
summary: Some(ReasoningSummary::Auto),
service_tier: None,
collaboration_mode: None,
personality: None,
environments: None,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let base_model_info = models_manager
.get_model_info("gpt-5.2", &config.to_models_manager_config())
.await;
let body = response_mock.single_request().body_json();
let instructions = body["instructions"].as_str().unwrap();
assert_eq!(instructions, base_model_info.base_instructions);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_do_not_append_removed_builtin_presets() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let remote_model =
test_remote_model("remote-alpha", ModelVisibility::List, /*priority*/ 0);
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model.clone()],
},
)
.await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let available = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
let remote = available
.iter()
.find(|model| model.model == "remote-alpha")
.expect("remote model should be listed");
let mut expected_remote: ModelPreset = remote_model.into();
expected_remote.is_default = remote.is_default;
assert_eq!(*remote, expected_remote);
let default_model = available
.iter()
.find(|model| model.show_in_picker)
.expect("default model should be set");
assert!(default_model.is_default);
assert_eq!(
available.iter().filter(|model| model.is_default).count(),
1,
"expected a single default model"
);
assert_eq!(
models_mock.requests().len(),
1,
"expected a single /models request"
);
// Keep the mock server alive until after async assertions complete.
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_merge_adds_new_high_priority_first() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let remote_model = test_remote_model(
"remote-top",
ModelVisibility::List,
/*priority*/ -10_000,
);
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let available = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
assert_eq!(
available.first().map(|model| model.model.as_str()),
Some("remote-top")
);
assert_eq!(
models_mock.requests().len(),
1,
"expected a single /models request"
);
// Keep the mock server alive until after async assertions complete.
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_merge_replaces_overlapping_model() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let slug = bundled_model_slug();
let mut remote_model = test_remote_model(&slug, ModelVisibility::List, /*priority*/ 0);
remote_model.display_name = "Overridden".to_string();
remote_model.description = Some("Overridden description".to_string());
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model.clone()],
},
)
.await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let available = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
let overridden = available
.iter()
.find(|model| model.model == slug)
.expect("overlapping model should be listed");
assert_eq!(overridden.display_name, remote_model.display_name);
assert_eq!(
overridden.description,
remote_model
.description
.expect("remote model should include description")
);
assert_eq!(
models_mock.requests().len(),
1,
"expected a single /models request"
);
// Keep the mock server alive until after async assertions complete.
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_merge_preserves_bundled_models_on_empty_response() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let _models_mock = mount_models_once(&server, ModelsResponse { models: Vec::new() }).await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let available = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
let bundled_slug = bundled_model_slug();
assert!(
available.iter().any(|model| model.model == bundled_slug),
"bundled models should remain available after empty remote response"
);
// Keep the mock server alive until after async assertions complete.
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_request_times_out_after_5s() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let remote_model =
test_remote_model("remote-timeout", ModelVisibility::List, /*priority*/ 0);
let models_mock = mount_models_once_with_delay(
&server,
ModelsResponse {
models: vec![remote_model],
},
Duration::from_secs(6),
)
.await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let start = Instant::now();
let model = timeout(
Duration::from_secs(7),
manager.get_default_model(&None, RefreshStrategy::OnlineIfUncached),
)
.await;
let elapsed = start.elapsed();
// get_model should return a default model even when refresh times out
let default_model = model.expect("get_model should finish and return default model");
let expected_default = bundled_default_model_slug();
assert!(
default_model == expected_default,
"get_model should return default model when refresh times out, got: {default_model}"
);
let _ = server
.received_requests()
.await
.expect("mock server should capture requests")
.iter()
.map(|req| format!("{} {}", req.method, req.url.path()))
.collect::<Vec<String>>();
assert!(
elapsed >= Duration::from_millis(4_500),
"expected models call to block near the timeout; took {elapsed:?}"
);
assert!(
elapsed < Duration::from_millis(5_800),
"expected models call to time out before the delayed response; took {elapsed:?}"
);
assert_eq!(
models_mock.requests().len(),
1,
"expected a single /models request"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_hide_picker_only_models() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::start().await;
let remote_model = test_remote_model(
"codex-auto-balanced",
ModelVisibility::Hide,
/*priority*/ 0,
);
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let codex_home = TempDir::new()?;
let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["openai"].clone()
};
let manager = codex_core::test_support::models_manager_with_provider(
codex_home.path().to_path_buf(),
codex_core::test_support::auth_manager_from_auth(auth),
provider,
);
let selected = manager
.get_default_model(&None, RefreshStrategy::OnlineIfUncached)
.await;
assert_eq!(selected, bundled_default_model_slug());
let available = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
let hidden = available
.iter()
.find(|model| model.model == "codex-auto-balanced")
.expect("hidden remote model should be listed");
assert!(!hidden.show_in_picker, "hidden models should remain hidden");
assert_eq!(
models_mock.requests().len(),
1,
"expected a single /models request"
);
// Keep the mock server alive until after async assertions complete.
drop(server);
Ok(())
}
async fn wait_for_model_available(manager: &SharedModelsManager, slug: &str) -> ModelPreset {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if let Some(model) = {
let guard = manager.list_models(RefreshStrategy::OnlineIfUncached).await;
guard.iter().find(|model| model.model == slug).cloned()
} {
return model;
}
if Instant::now() >= deadline {
panic!("timed out waiting for the remote model {slug} to appear");
}
sleep(Duration::from_millis(25)).await;
}
}
fn bundled_model_slug() -> String {
let response = bundled_models_response()
.unwrap_or_else(|err| panic!("bundled models.json should parse: {err}"));
response
.models
.first()
.expect("bundled models.json should include at least one model")
.slug
.clone()
}
fn bundled_default_model_slug() -> String {
codex_core::test_support::all_model_presets()
.iter()
.find(|preset| preset.is_default)
.expect("bundled models should include a default")
.model
.clone()
}
fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> ModelInfo {
test_remote_model_with_policy(
slug,
visibility,
priority,
TruncationPolicyConfig::bytes(/*limit*/ 10_000),
)
}
fn test_remote_model_with_policy(
slug: &str,
visibility: ModelVisibility,
priority: i32,
truncation_policy: TruncationPolicyConfig,
) -> ModelInfo {
ModelInfo {
slug: slug.to_string(),
display_name: format!("{slug} display"),
description: Some(format!("{slug} description")),
default_reasoning_level: Some(ReasoningEffort::Medium),
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::ShellCommand,
visibility,
supported_in_api: true,
input_modalities: default_input_modalities(),
used_fallback_model_metadata: false,
supports_search_tool: false,
priority,
additional_speed_tiers: Vec::new(),
upgrade: None,
base_instructions: "base instructions".to_string(),
model_messages: None,
supports_reasoning_summaries: false,
default_reasoning_summary: ReasoningSummary::Auto,
support_verbosity: false,
default_verbosity: None,
availability_nux: None,
apply_patch_tool_type: None,
web_search_tool_type: Default::default(),
truncation_policy,
supports_parallel_tool_calls: false,
supports_image_detail_original: false,
context_window: Some(272_000),
max_context_window: None,
auto_compact_token_limit: None,
effective_context_window_percent: 95,
experimental_supported_tools: Vec::new(),
}
}