mirror of
https://github.com/openai/codex.git
synced 2026-04-30 17:36:40 +00:00
Update models.json (#18586)
- Replace the active models-manager catalog with the deleted core catalog contents. - Replace stale hardcoded test model slugs with current bundled model slugs. - Keep this as a stacked change on top of the cleanup PR.
This commit is contained in:
@@ -782,7 +782,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
||||
@@ -799,7 +799,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
@@ -816,7 +816,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "danger-network-ok",
|
||||
@@ -832,7 +832,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "danger-network-ok",
|
||||
@@ -847,7 +847,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-unless",
|
||||
@@ -862,7 +862,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-unless",
|
||||
@@ -877,7 +877,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -895,7 +895,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -914,7 +914,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
||||
@@ -931,7 +931,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
@@ -948,7 +948,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -968,7 +968,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -988,7 +988,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
||||
@@ -1005,7 +1005,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
@@ -1022,7 +1022,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1042,7 +1042,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1061,7 +1061,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-read-only",
|
||||
@@ -1076,7 +1076,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-read-only",
|
||||
@@ -1127,7 +1127,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1148,7 +1148,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1168,7 +1168,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1187,7 +1187,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1226,7 +1226,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::Workspace("apply_patch_function.txt"),
|
||||
@@ -1243,7 +1243,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![Feature::ApplyPatchFreeform],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
||||
@@ -1260,7 +1260,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1280,7 +1280,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -1320,7 +1320,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1340,7 +1340,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
||||
@@ -1359,7 +1359,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1379,7 +1379,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
model_override: Some("gpt-5.4"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1422,7 +1422,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-never",
|
||||
@@ -1438,7 +1438,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::Workspace("ww_on_request.txt"),
|
||||
@@ -1469,7 +1469,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1489,7 +1489,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "workspace-network-ok",
|
||||
@@ -1506,7 +1506,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1526,7 +1526,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1570,7 +1570,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "hello unified exec",
|
||||
@@ -1588,7 +1588,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: Some("gpt-5"),
|
||||
model_override: Some("gpt-5.2"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
@@ -1675,7 +1675,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
let sandbox_policy = scenario.sandbox_policy.clone();
|
||||
let features = scenario.features.clone();
|
||||
let model_override = scenario.model_override;
|
||||
let model = model_override.unwrap_or("gpt-5.1");
|
||||
let model = model_override.unwrap_or("gpt-5.4");
|
||||
|
||||
let mut builder = test_codex().with_model(model).with_config(move |config| {
|
||||
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
||||
@@ -1804,7 +1804,7 @@ async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file()
|
||||
let sandbox_policy_for_config = sandbox_policy.clone();
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.1-codex")
|
||||
.with_model("gpt-5.4")
|
||||
.with_config(move |config| {
|
||||
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
||||
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
||||
|
||||
Reference in New Issue
Block a user