Update models.json (#18586)

- Replace the active models-manager catalog with the deleted core
catalog contents.
- Replace stale hardcoded test model slugs with current bundled model
slugs.
- Keep this as a stacked change on top of the cleanup PR.
This commit is contained in:
Ahmed Ibrahim
2026-04-20 10:27:01 -07:00
committed by GitHub
parent 5d5d610740
commit 316cf0e90b
63 changed files with 540 additions and 1016 deletions

View File

@@ -782,7 +782,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
@@ -799,7 +799,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
@@ -816,7 +816,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "danger-network-ok",
@@ -832,7 +832,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "danger-network-ok",
@@ -847,7 +847,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-unless",
@@ -862,7 +862,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-unless",
@@ -877,7 +877,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -895,7 +895,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -914,7 +914,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
@@ -931,7 +931,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
@@ -948,7 +948,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -968,7 +968,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -988,7 +988,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
@@ -1005,7 +1005,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
@@ -1022,7 +1022,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1042,7 +1042,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1061,7 +1061,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-read-only",
@@ -1076,7 +1076,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-read-only",
@@ -1127,7 +1127,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1148,7 +1148,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1168,7 +1168,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1187,7 +1187,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1226,7 +1226,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::Workspace("apply_patch_function.txt"),
@@ -1243,7 +1243,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
@@ -1260,7 +1260,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1280,7 +1280,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -1320,7 +1320,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1340,7 +1340,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
model_override: Some("gpt-5.4"),
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
@@ -1359,7 +1359,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1379,7 +1379,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
model_override: Some("gpt-5.4"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1422,7 +1422,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-never",
@@ -1438,7 +1438,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::Workspace("ww_on_request.txt"),
@@ -1469,7 +1469,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1489,7 +1489,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "workspace-network-ok",
@@ -1506,7 +1506,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1526,7 +1526,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1570,7 +1570,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![Feature::UnifiedExec],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "hello unified exec",
@@ -1588,7 +1588,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![Feature::UnifiedExec],
model_override: Some("gpt-5"),
model_override: Some("gpt-5.2"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
@@ -1675,7 +1675,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let sandbox_policy = scenario.sandbox_policy.clone();
let features = scenario.features.clone();
let model_override = scenario.model_override;
let model = model_override.unwrap_or("gpt-5.1");
let model = model_override.unwrap_or("gpt-5.4");
let mut builder = test_codex().with_model(model).with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
@@ -1804,7 +1804,7 @@ async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file()
let sandbox_policy_for_config = sandbox_policy.clone();
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_model("gpt-5.4")
.with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);