Update defaults to gpt-5.1 (#6652)

## Summary
- update documentation, example configs, and automation defaults to
reference gpt-5.1 / gpt-5.1-codex
- bump the CLI and core configuration defaults, model presets, and error
messaging to the new models while keeping the model-family/tool coverage
for legacy slugs
- refresh tests, fixtures, and TUI snapshots so they expect the upgraded
defaults

## Testing
- `cargo test -p codex-core
config::tests::test_precedence_fixture_with_gpt5_profile`


------
[Codex
Task](https://chatgpt.com/codex/tasks/task_i_6916c5b3c2b08321ace04ee38604fc6b)
This commit is contained in:
Ahmed Ibrahim
2025-11-17 17:40:11 -08:00
committed by GitHub
parent 8465f1f2f4
commit ddcc60a085
43 changed files with 483 additions and 238 deletions

View File

@@ -240,6 +240,10 @@ enum Expectation {
target: TargetPath,
content: &'static str,
},
FileCreatedNoExitCode {
target: TargetPath,
content: &'static str,
},
PatchApplied {
target: TargetPath,
content: &'static str,
@@ -251,12 +255,18 @@ enum Expectation {
NetworkSuccess {
body_contains: &'static str,
},
NetworkSuccessNoExitCode {
body_contains: &'static str,
},
NetworkFailure {
expect_tag: &'static str,
},
CommandSuccess {
stdout_contains: &'static str,
},
CommandSuccessNoExitCode {
stdout_contains: &'static str,
},
CommandFailure {
output_contains: &'static str,
},
@@ -270,8 +280,7 @@ impl Expectation {
assert_eq!(
result.exit_code,
Some(0),
"expected successful exit for {:?}",
path
"expected successful exit for {path:?}"
);
assert!(
result.stdout.contains(content),
@@ -285,6 +294,21 @@ impl Expectation {
);
let _ = fs::remove_file(path);
}
Expectation::FileCreatedNoExitCode { target, content } => {
let (path, _) = target.resolve_for_patch(test);
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
assert!(
result.stdout.contains(content),
"stdout missing {content:?}: {}",
result.stdout
);
let file_contents = fs::read_to_string(&path)?;
assert!(
file_contents.contains(content),
"file contents missing {content:?}: {file_contents}"
);
let _ = fs::remove_file(path);
}
Expectation::PatchApplied { target, content } => {
let (path, _) = target.resolve_for_patch(test);
match result.exit_code {
@@ -360,6 +384,23 @@ impl Expectation {
result.stdout
);
}
Expectation::NetworkSuccessNoExitCode { body_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for successful network call: {}",
result.stdout
);
assert!(
result.stdout.contains("OK:"),
"stdout missing OK prefix: {}",
result.stdout
);
assert!(
result.stdout.contains(body_contains),
"stdout missing body text {body_contains:?}: {}",
result.stdout
);
}
Expectation::NetworkFailure { expect_tag } => {
assert_ne!(
result.exit_code,
@@ -391,6 +432,18 @@ impl Expectation {
result.stdout
);
}
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for trusted command: {}",
result.stdout
);
assert!(
result.stdout.contains(stdout_contains),
"trusted command stdout missing {stdout_contains:?}: {}",
result.stdout
);
}
Expectation::CommandFailure { output_contains } => {
assert_ne!(
result.exit_code,
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network",
approval_policy: OnRequest,
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::FetchUrl {
endpoint: "/dfa/network",
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt",
approval_policy: UnlessTrusted,
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunCommand {
command: &["echo", "trusted-unless"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write",
approval_policy: OnFailure,
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval",
approval_policy: UnlessTrusted,
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write",
approval_policy: Never,
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
approval_policy: Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval",
approval_policy: OnRequest,
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-approval",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt",
approval_policy: OnRequest,
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::RunCommand {
command: &["echo", "trusted-read-only"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "read_only_on_request_blocks_network",
approval_policy: OnRequest,
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-on-failure",
},
},
#[cfg(not(target_os = "linux"))]
ScenarioSpec {
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved",
approval_policy: OnRequest,
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::FetchUrl {
endpoint: "/ro/network-approved",
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "apply_patch_shell_requires_patch_approval",
approval_policy: UnlessTrusted,
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::Workspace("apply_patch_function.txt"),
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_never_reports_sandbox_failure",
approval_policy: Never,
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-never",
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::Workspace("ww_on_request.txt"),
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "workspace-network-ok",
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "hello unified exec",
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = approval_policy;
config.sandbox_policy = sandbox_policy.clone();
let model = model_override.unwrap_or("gpt-5");
let model = model_override.unwrap_or("gpt-5.1");
config.model = model.to_string();
config.model_family =
find_family_for_model(model).expect("model should map to a known family");