Remove offline fallback for models (#11238)

# External (non-OpenAI) Pull Request Requirements Before opening this Pull Request, please read the dedicated "Contributing" markdown file or your PR may be closed: https://github.com/openai/codex/blob/main/docs/contributing.md If your PR conforms to our contribution guidelines, replace this text with a detailed and high quality description of your changes. Include a link to a bug report or enhancement request.
2026-04-26 15:45:02 +00:00 · 2026-02-09 16:58:54 -08:00
parent a3e4bd3bc0
commit a1abd53b6a
8 changed files with 122 additions and 499 deletions
--- a/codex-rs/core/tests/suite/model_info_overrides.rs
+++ b/codex-rs/core/tests/suite/model_info_overrides.rs
@@ -1,3 +1,5 @@
+use codex_core::CodexAuth;
+use codex_core::features::Feature;
 use codex_core::models_manager::manager::ModelsManager;
 use codex_protocol::openai_models::TruncationPolicyConfig;
 use core_test_support::load_default_config_for_test;
@@ -7,9 +9,14 @@ use tempfile::TempDir;
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn offline_model_info_without_tool_output_override() {
    let codex_home = TempDir::new().expect("create temp dir");
-    let config = load_default_config_for_test(&codex_home).await;
+    let mut config = load_default_config_for_test(&codex_home).await;
+    config.features.enable(Feature::RemoteModels);
+    let auth_manager = codex_core::AuthManager::from_auth_for_testing(
+        CodexAuth::create_dummy_chatgpt_auth_for_testing(),
+    );
+    let manager = ModelsManager::new(config.codex_home.clone(), auth_manager);

-    let model_info = ModelsManager::construct_model_info_offline("gpt-5.1", &config);
+    let model_info = manager.get_model_info("gpt-5.1", &config).await;

    assert_eq!(
        model_info.truncation_policy,
@@ -21,9 +28,14 @@ async fn offline_model_info_without_tool_output_override() {
 async fn offline_model_info_with_tool_output_override() {
    let codex_home = TempDir::new().expect("create temp dir");
    let mut config = load_default_config_for_test(&codex_home).await;
+    config.features.enable(Feature::RemoteModels);
    config.tool_output_token_limit = Some(123);
+    let auth_manager = codex_core::AuthManager::from_auth_for_testing(
+        CodexAuth::create_dummy_chatgpt_auth_for_testing(),
+    );
+    let manager = ModelsManager::new(config.codex_home.clone(), auth_manager);

-    let model_info = ModelsManager::construct_model_info_offline("gpt-5.1-codex", &config);
+    let model_info = manager.get_model_info("gpt-5.1-codex", &config).await;

    assert_eq!(
        model_info.truncation_policy,
--- a/codex-rs/core/tests/suite/model_tools.rs
+++ b/codex-rs/core/tests/suite/model_tools.rs
@@ -36,6 +36,7 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
        .with_model(model)
        // Keep tool expectations stable when the default web_search mode changes.
        .with_config(|config| {
+            config.features.enable(Feature::RemoteModels);
            config
                .web_search_mode
                .set(WebSearchMode::Cached)
@@ -68,22 +69,23 @@ async fn model_selects_expected_tools() {
    skip_if_no_network!();
    use pretty_assertions::assert_eq;

-    let codex_tools = collect_tool_identifiers_for_model("codex-mini-latest").await;
+    let gpt51_codex_max_tools = collect_tool_identifiers_for_model("gpt-5.1-codex-max").await;
    assert_eq!(
-        codex_tools,
+        gpt51_codex_max_tools,
        expected_default_tools(
-            "local_shell",
+            "shell_command",
            &[
                "list_mcp_resources",
                "list_mcp_resource_templates",
                "read_mcp_resource",
                "update_plan",
                "request_user_input",
+                "apply_patch",
                "web_search",
                "view_image",
            ],
        ),
-        "codex-mini-latest should expose the local shell tool",
+        "gpt-5.1-codex-max should expose the apply_patch tool",
    );

    let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
@@ -160,21 +162,4 @@ async fn model_selects_expected_tools() {
        ),
        "gpt-5.1 should expose the apply_patch tool",
    );
-    let exp_tools = collect_tool_identifiers_for_model("exp-5.1").await;
-    assert_eq!(
-        exp_tools,
-        vec![
-            "exec_command".to_string(),
-            "write_stdin".to_string(),
-            "list_mcp_resources".to_string(),
-            "list_mcp_resource_templates".to_string(),
-            "read_mcp_resource".to_string(),
-            "update_plan".to_string(),
-            "request_user_input".to_string(),
-            "apply_patch".to_string(),
-            "web_search".to_string(),
-            "view_image".to_string()
-        ],
-        "exp-5.1 should expose the apply_patch tool",
-    );
 }
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -2,7 +2,6 @@

 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use codex_core::features::Feature;
-use codex_core::models_manager::model_info::BASE_INSTRUCTIONS;
 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
 use codex_core::protocol::EventMsg;
@@ -179,7 +178,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn codex_mini_latest_tools() -> anyhow::Result<()> {
+async fn gpt_5_tools_without_apply_patch_append_apply_patch_instructions() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
    use pretty_assertions::assert_eq;

@@ -198,9 +197,10 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.user_instructions = Some("be consistent and helpful".to_string());
+            config.features.enable(Feature::RemoteModels);
            config.features.disable(Feature::ApplyPatchFreeform);
            config.features.enable(Feature::CollaborationModes);
-            config.model = Some("codex-mini-latest".to_string());
+            config.model = Some("gpt-5".to_string());
        })
        .build(&server)
        .await?;
@@ -228,15 +228,13 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {

    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

-    let expected_instructions = [BASE_INSTRUCTIONS, APPLY_PATCH_TOOL_INSTRUCTIONS].join("\n");
-
    let body0 = req1.single_request().body_json();
    let instructions0 = body0["instructions"]
        .as_str()
        .expect("instructions should be a string");
-    assert_eq!(
-        normalize_newlines(instructions0),
-        normalize_newlines(&expected_instructions)
+    assert!(
+        instructions0.contains("You are"),
+        "expected non-empty instructions"
    );

    let body1 = req2.single_request().body_json();
@@ -245,7 +243,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
        .expect("instructions should be a string");
    assert_eq!(
        normalize_newlines(instructions1),
-        normalize_newlines(&expected_instructions)
+        normalize_newlines(instructions0)
    );

    Ok(())
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -18,7 +18,6 @@ use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_function_call;
 use core_test_support::responses::ev_response_created;
 use core_test_support::responses::mount_sse_once;
-use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
 use core_test_support::skip_if_no_network;
@@ -30,67 +29,6 @@ use serde_json::json;
 use std::collections::HashMap;
 use std::time::Duration;

-// Verifies byte-truncation formatting for function error output (RespondToModel errors)
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
-    skip_if_no_network!(Ok(()));
-
-    let server = start_mock_server().await;
-    let mut builder = test_codex().with_model("test-gpt-5.1-codex");
-    let test = builder.build(&server).await?;
-
-    // Construct a very long, non-existent path to force a RespondToModel error with a large message
-    let long_path = "long path text should trigger truncation".repeat(8_000);
-    let call_id = "grep-huge-error";
-    let args = json!({
-        "pattern": "alpha",
-        "path": long_path,
-        "limit": 10
-    });
-    let responses = vec![
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    ];
-    let mock = mount_sse_sequence(&server, responses).await;
-
-    test.submit_turn_with_policy(
-        "trigger grep_files with long path to test truncation",
-        SandboxPolicy::DangerFullAccess,
-    )
-    .await?;
-
-    let output = mock
-        .function_call_output_text(call_id)
-        .context("function error output present")?;
-
-    tracing::debug!(output = %output, "truncated function error output");
-
-    // Expect plaintext with token-based truncation marker and no omitted-lines marker
-    assert!(
-        serde_json::from_str::<serde_json::Value>(&output).is_err(),
-        "expected error output to be plain text",
-    );
-    assert!(
-        !output.contains("Total output lines:"),
-        "error output should not include line-based truncation header: {output}",
-    );
-    let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
-    assert_regex_match(truncated_pattern, &output);
-    assert!(
-        !output.contains("omitted"),
-        "line omission marker should not appear when no lines were dropped: {output}"
-    );
-
-    Ok(())
-}
-
 // Verifies that a standard tool call (shell_command) exceeding the model formatting
 // limits is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]