Update defaults to gpt-5.1 (#6652)

## Summary
- update documentation, example configs, and automation defaults to
reference gpt-5.1 / gpt-5.1-codex
- bump the CLI and core configuration defaults, model presets, and error
messaging to the new models while keeping the model-family/tool coverage
for legacy slugs
- refresh tests, fixtures, and TUI snapshots so they expect the upgraded
defaults

## Testing
- `cargo test -p codex-core
config::tests::test_precedence_fixture_with_gpt5_profile`


------
[Codex
Task](https://chatgpt.com/codex/tasks/task_i_6916c5b3c2b08321ace04ee38604fc6b)
This commit is contained in:
Ahmed Ibrahim
2025-11-17 17:40:11 -08:00
committed by GitHub
parent 8465f1f2f4
commit ddcc60a085
43 changed files with 483 additions and 238 deletions

View File

@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
let test = harness.test();

View File

@@ -240,6 +240,10 @@ enum Expectation {
target: TargetPath,
content: &'static str,
},
FileCreatedNoExitCode {
target: TargetPath,
content: &'static str,
},
PatchApplied {
target: TargetPath,
content: &'static str,
@@ -251,12 +255,18 @@ enum Expectation {
NetworkSuccess {
body_contains: &'static str,
},
NetworkSuccessNoExitCode {
body_contains: &'static str,
},
NetworkFailure {
expect_tag: &'static str,
},
CommandSuccess {
stdout_contains: &'static str,
},
CommandSuccessNoExitCode {
stdout_contains: &'static str,
},
CommandFailure {
output_contains: &'static str,
},
@@ -270,8 +280,7 @@ impl Expectation {
assert_eq!(
result.exit_code,
Some(0),
"expected successful exit for {:?}",
path
"expected successful exit for {path:?}"
);
assert!(
result.stdout.contains(content),
@@ -285,6 +294,21 @@ impl Expectation {
);
let _ = fs::remove_file(path);
}
Expectation::FileCreatedNoExitCode { target, content } => {
let (path, _) = target.resolve_for_patch(test);
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
assert!(
result.stdout.contains(content),
"stdout missing {content:?}: {}",
result.stdout
);
let file_contents = fs::read_to_string(&path)?;
assert!(
file_contents.contains(content),
"file contents missing {content:?}: {file_contents}"
);
let _ = fs::remove_file(path);
}
Expectation::PatchApplied { target, content } => {
let (path, _) = target.resolve_for_patch(test);
match result.exit_code {
@@ -360,6 +384,23 @@ impl Expectation {
result.stdout
);
}
Expectation::NetworkSuccessNoExitCode { body_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for successful network call: {}",
result.stdout
);
assert!(
result.stdout.contains("OK:"),
"stdout missing OK prefix: {}",
result.stdout
);
assert!(
result.stdout.contains(body_contains),
"stdout missing body text {body_contains:?}: {}",
result.stdout
);
}
Expectation::NetworkFailure { expect_tag } => {
assert_ne!(
result.exit_code,
@@ -391,6 +432,18 @@ impl Expectation {
result.stdout
);
}
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for trusted command: {}",
result.stdout
);
assert!(
result.stdout.contains(stdout_contains),
"trusted command stdout missing {stdout_contains:?}: {}",
result.stdout
);
}
Expectation::CommandFailure { output_contains } => {
assert_ne!(
result.exit_code,
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network",
approval_policy: OnRequest,
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::FetchUrl {
endpoint: "/dfa/network",
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt",
approval_policy: UnlessTrusted,
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunCommand {
command: &["echo", "trusted-unless"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write",
approval_policy: OnFailure,
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval",
approval_policy: UnlessTrusted,
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write",
approval_policy: Never,
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
approval_policy: Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval",
approval_policy: OnRequest,
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-approval",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt",
approval_policy: OnRequest,
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::RunCommand {
command: &["echo", "trusted-read-only"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "read_only_on_request_blocks_network",
approval_policy: OnRequest,
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-on-failure",
},
},
#[cfg(not(target_os = "linux"))]
ScenarioSpec {
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved",
approval_policy: OnRequest,
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::FetchUrl {
endpoint: "/ro/network-approved",
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "apply_patch_shell_requires_patch_approval",
approval_policy: UnlessTrusted,
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::Workspace("apply_patch_function.txt"),
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_never_reports_sandbox_failure",
approval_policy: Never,
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-never",
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::Workspace("ww_on_request.txt"),
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "workspace-network-ok",
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "hello unified exec",
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = approval_policy;
config.sandbox_policy = sandbox_policy.clone();
let model = model_override.unwrap_or("gpt-5");
let model = model_override.unwrap_or("gpt-5.1");
config.model = model.to_string();
config.model_family =
find_family_for_model(model).expect("model should map to a known family");

View File

@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5-codex")
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5")
.with_model("gpt-5.1")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
"reasoning_output_tokens": 0,
"total_tokens": 123
},
// Default model is gpt-5-codex in tests → 95% usable context window
// Default model is gpt-5.1-codex in tests → 95% usable context window
"model_context_window": 258400
},
"rate_limits": {
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
config.model = "gpt-5.1".to_string();
config.model_family =
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
config.model_context_window = Some(272_000);
})
.build(&server)

View File

@@ -18,7 +18,6 @@ use codex_core::NewConversation;
use codex_core::built_in_model_providers;
use codex_core::compact::SUMMARIZATION_PROMPT;
use codex_core::config::Config;
use codex_core::config::OPENAI_DEFAULT_MODEL;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::WarningEvent;
@@ -111,9 +110,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
let server = MockServer::start().await;
mount_initial_flow(&server).await;
let expected_model = "gpt-5.1-codex";
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
let (_home, config, manager, base) = start_test_conversation(&server).await;
let (_home, config, manager, base) =
start_test_conversation(&server, Some(expected_model)).await;
user_turn(&base, "hello world").await;
compact_conversation(&base).await;
@@ -189,7 +189,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
.as_str()
.unwrap_or_default()
.to_string();
let expected_model = OPENAI_DEFAULT_MODEL;
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
@@ -558,7 +557,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
mount_second_compact_flow(&server).await;
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
let (_home, config, manager, base) = start_test_conversation(&server).await;
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
user_turn(&base, "hello world").await;
compact_conversation(&base).await;
@@ -808,6 +807,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
async fn start_test_conversation(
server: &MockServer,
model: Option<&str>,
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -817,7 +817,9 @@ async fn start_test_conversation(
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
if let Some(model) = model {
config.model = model.to_string();
}
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let NewConversation { conversation, .. } = manager
.new_conversation(config.clone())

View File

@@ -11,7 +11,7 @@ use std::collections::HashSet;
use std::path::Path;
use std::process::Command as StdCommand;
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
fn ripgrep_available() -> bool {
StdCommand::new("rg")

View File

@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
codex_returns_json_result("gpt-5".to_string()).await
codex_returns_json_result("gpt-5.1".to_string()).await
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
codex_returns_json_result("gpt-5-codex".to_string()).await
codex_returns_json_result("gpt-5.1-codex".to_string()).await
}
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {

View File

@@ -160,7 +160,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
// with the OpenAI schema, so we just verify the tool presence here
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
(
"gpt-5",
"gpt-5.1",
vec![
"shell",
"list_mcp_resources",
@@ -183,7 +183,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
],
),
(
"gpt-5-codex",
"gpt-5.1-codex",
vec![
"shell",
"list_mcp_resources",

View File

@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
// Choose a review model different from the main model; ensure it is used.
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
cfg.model = "gpt-4.1".to_string();
cfg.review_model = "gpt-5".to_string();
cfg.review_model = "gpt-5.1".to_string();
})
.await;
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
// Assert the request body model equals the configured review model
let request = &server.received_requests().await.unwrap()[0];
let body = request.body_json::<serde_json::Value>().unwrap();
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
server.verify().await;
}

View File

@@ -376,9 +376,9 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1 is a model family");
});
let test = builder.build(&server).await?;
@@ -741,9 +741,9 @@ async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
@@ -847,9 +847,9 @@ async fn local_shell_call_output_is_structured() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;

View File

@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
#[allow(clippy::expect_used)]
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
let mut builder = test_codex().with_config(|config| {
config.model = "test-gpt-5-codex".to_string();
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
});
builder.build(server).await
}

View File

@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
});
let fixture = builder.build(&server).await?;
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
});
let test = builder.build(&server).await?;

View File

@@ -41,9 +41,9 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
// Use the test model that wires function tools like grep_files
config.model = "test-gpt-5-codex".to_string();
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
});
let test = builder.build(&server).await?;
@@ -105,9 +105,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;
@@ -197,9 +197,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;
let call_id = "shell-single-truncation";

View File

@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
async fn undo_harness() -> Result<TestCodexHarness> {
TestCodexHarness::with_config(|config: &mut Config| {
config.include_apply_patch_tool = true;
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
config.features.enable(Feature::GhostCommit);
})
.await

View File

@@ -159,7 +159,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
@@ -236,7 +236,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
@@ -288,28 +288,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(
begin_event.cwd, workdir,
"exec_command cwd should reflect the requested workdir override"
);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs
.get(call_id)
.expect("missing exec_command workdir output");
let output_text = output.output.trim();
let output_canonical = std::fs::canonicalize(output_text)?;
let expected_canonical = std::fs::canonicalize(&workdir)?;
assert_eq!(
output_canonical, expected_canonical,
"pwd should reflect the requested workdir override"
);
Ok(())
}

View File

@@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;