mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
Update defaults to gpt-5.1 (#6652)
## Summary - update documentation, example configs, and automation defaults to reference gpt-5.1 / gpt-5.1-codex - bump the CLI and core configuration defaults, model presets, and error messaging to the new models while keeping the model-family/tool coverage for legacy slugs - refresh tests, fixtures, and TUI snapshots so they expect the upgraded defaults ## Testing - `cargo test -p codex-core config::tests::test_precedence_fixture_with_gpt5_profile` ------ [Codex Task](https://chatgpt.com/codex/tasks/task_i_6916c5b3c2b08321ace04ee38604fc6b)
This commit is contained in:
2
.github/codex/home/config.toml
vendored
2
.github/codex/home/config.toml
vendored
@@ -1,3 +1,3 @@
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
|
||||
# Consider setting [mcp_servers] here!
|
||||
|
||||
2
.github/workflows/issue-deduplicator.yml
vendored
2
.github/workflows/issue-deduplicator.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
with:
|
||||
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
allow-users: "*"
|
||||
model: gpt-5
|
||||
model: gpt-5.1
|
||||
prompt: |
|
||||
You are an assistant that triages new GitHub issues by identifying potential duplicates.
|
||||
|
||||
|
||||
@@ -524,7 +524,7 @@ mod tests {
|
||||
let request = ClientRequest::NewConversation {
|
||||
request_id: RequestId::Integer(42),
|
||||
params: v1::NewConversationParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
model_provider: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
@@ -542,7 +542,7 @@ mod tests {
|
||||
"method": "newConversation",
|
||||
"id": 42,
|
||||
"params": {
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"modelProvider": null,
|
||||
"profile": null,
|
||||
"cwd": null,
|
||||
|
||||
@@ -74,7 +74,7 @@ Start a fresh thread when you need a new Codex conversation.
|
||||
{ "method": "thread/start", "id": 10, "params": {
|
||||
// Optionally set config settings. If not specified, will use the user's
|
||||
// current config settings.
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"cwd": "/Users/me/project",
|
||||
"approvalPolicy": "never",
|
||||
"sandbox": "workspaceWrite",
|
||||
@@ -155,7 +155,7 @@ You can optionally specify config overrides on the new turn. If specified, these
|
||||
"writableRoots": ["/Users/me/project"],
|
||||
"networkAccess": true
|
||||
},
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"effort": "medium",
|
||||
"summary": "concise"
|
||||
} }
|
||||
|
||||
@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
approval_policy = "on-request"
|
||||
sandbox_mode = "workspace-write"
|
||||
model_reasoning_summary = "detailed"
|
||||
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
|
||||
}),
|
||||
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
|
||||
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
|
||||
model: Some("gpt-5-codex".into()),
|
||||
model: Some("gpt-5.1-codex".into()),
|
||||
model_reasoning_effort: Some(ReasoningEffort::High),
|
||||
model_reasoning_summary: Some(ReasoningSummary::Detailed),
|
||||
model_verbosity: Some(Verbosity::Medium),
|
||||
|
||||
@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -27,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -68,7 +68,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -112,7 +112,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -29,7 +29,7 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
|
||||
// Start a v2 thread with an explicit model override.
|
||||
let req_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5".to_string()),
|
||||
model: Some("gpt-5.1".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -761,9 +761,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resume_model_flag_applies_when_no_root_flags() {
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5.1-test"].as_ref());
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.resume_picker);
|
||||
assert!(!interactive.resume_last);
|
||||
assert_eq!(interactive.resume_session_id, None);
|
||||
@@ -808,7 +808,7 @@ mod tests {
|
||||
"--ask-for-approval",
|
||||
"on-request",
|
||||
"-m",
|
||||
"gpt-5-test",
|
||||
"gpt-5.1-test",
|
||||
"-p",
|
||||
"my-profile",
|
||||
"-C",
|
||||
@@ -819,7 +819,7 @@ mod tests {
|
||||
.as_ref(),
|
||||
);
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.oss);
|
||||
assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
|
||||
assert_matches!(
|
||||
|
||||
@@ -1225,7 +1225,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn error_when_error_event() {
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
|
||||
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
@@ -1254,7 +1254,7 @@ mod tests {
|
||||
Err(CodexErr::Stream(msg, delay)) => {
|
||||
assert_eq!(
|
||||
msg,
|
||||
"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
);
|
||||
assert_eq!(*delay, Some(Duration::from_secs_f64(11.054)));
|
||||
}
|
||||
@@ -1473,7 +1473,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
@@ -1487,7 +1487,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after_no_delay() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
|
||||
@@ -427,7 +427,7 @@ mod tests {
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
slug: "gpt-5-codex",
|
||||
slug: "gpt-5.1-codex",
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
@@ -457,7 +457,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -498,7 +498,7 @@ mod tests {
|
||||
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
|
||||
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -534,7 +534,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
|
||||
@@ -584,7 +584,7 @@ mod tests {
|
||||
codex_home,
|
||||
None,
|
||||
&[ConfigEdit::SetModel {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
}],
|
||||
)
|
||||
@@ -592,7 +592,7 @@ mod tests {
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -722,7 +722,7 @@ model = "o5-preview"
|
||||
std::fs::write(
|
||||
codex_home.join(CONFIG_TOML_FILE),
|
||||
r#"[profiles."team a"]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.expect("seed");
|
||||
@@ -972,14 +972,14 @@ B = \"2\"
|
||||
let codex_home = tmp.path().to_path_buf();
|
||||
|
||||
ConfigEditsBuilder::new(&codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await
|
||||
.expect("persist");
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -1001,11 +1001,11 @@ model_reasoning_effort = "low"
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
assert_eq!(contents, initial_expected);
|
||||
|
||||
let updated_expected = r#"model = "gpt-5-codex"
|
||||
let updated_expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply_blocking()
|
||||
.expect("persist update");
|
||||
contents = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
|
||||
@@ -62,11 +62,11 @@ pub mod profile;
|
||||
pub mod types;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1";
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -81,7 +81,7 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
@@ -2616,7 +2616,7 @@ url = "https://example.com/mcp"
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2624,7 +2624,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
||||
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
||||
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
||||
|
||||
Ok(())
|
||||
@@ -2638,7 +2638,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.dev]
|
||||
@@ -2674,7 +2674,7 @@ model = "gpt-4.1"
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.with_profile(Some("dev"))
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::Medium))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2686,7 +2686,7 @@ model = "gpt-4.1"
|
||||
.get("dev")
|
||||
.expect("profile should be created");
|
||||
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(
|
||||
profile.model_reasoning_effort,
|
||||
Some(ReasoningEffort::Medium)
|
||||
@@ -2708,7 +2708,7 @@ model = "gpt-4"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.prod]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
@@ -2737,7 +2737,7 @@ model = "gpt-5-codex"
|
||||
.profiles
|
||||
.get("prod")
|
||||
.and_then(|profile| profile.model.as_deref()),
|
||||
Some("gpt-5-codex"),
|
||||
Some("gpt-5.1-codex"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -2852,7 +2852,7 @@ model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
|
||||
[profiles.gpt5]
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
model_reasoning_effort = "high"
|
||||
@@ -3168,9 +3168,9 @@ model_verbosity = "high"
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
model: "gpt-5.1".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
|
||||
@@ -132,7 +132,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5-codex") {
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
|
||||
@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
let test = harness.test();
|
||||
|
||||
@@ -240,6 +240,10 @@ enum Expectation {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
FileCreatedNoExitCode {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
PatchApplied {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
@@ -251,12 +255,18 @@ enum Expectation {
|
||||
NetworkSuccess {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkSuccessNoExitCode {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkFailure {
|
||||
expect_tag: &'static str,
|
||||
},
|
||||
CommandSuccess {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandSuccessNoExitCode {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandFailure {
|
||||
output_contains: &'static str,
|
||||
},
|
||||
@@ -270,8 +280,7 @@ impl Expectation {
|
||||
assert_eq!(
|
||||
result.exit_code,
|
||||
Some(0),
|
||||
"expected successful exit for {:?}",
|
||||
path
|
||||
"expected successful exit for {path:?}"
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
@@ -285,6 +294,21 @@ impl Expectation {
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::FileCreatedNoExitCode { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
"stdout missing {content:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
let file_contents = fs::read_to_string(&path)?;
|
||||
assert!(
|
||||
file_contents.contains(content),
|
||||
"file contents missing {content:?}: {file_contents}"
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::PatchApplied { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
match result.exit_code {
|
||||
@@ -360,6 +384,23 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for successful network call: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains("OK:"),
|
||||
"stdout missing OK prefix: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(body_contains),
|
||||
"stdout missing body text {body_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkFailure { expect_tag } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -391,6 +432,18 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for trusted command: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(stdout_contains),
|
||||
"trusted command stdout missing {stdout_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandFailure { output_contains } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/dfa/network",
|
||||
response_body: "danger-network-ok",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write",
|
||||
approval_policy: OnFailure,
|
||||
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write",
|
||||
approval_policy: Never,
|
||||
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval",
|
||||
approval_policy: OnRequest,
|
||||
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
||||
approval_policy: OnRequest,
|
||||
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_blocks_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved",
|
||||
approval_policy: OnRequest,
|
||||
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/ro/network-approved",
|
||||
response_body: "read-only-network-ok",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_requires_patch_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::Workspace("apply_patch_function.txt"),
|
||||
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::ApplyPatchFreeform],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
||||
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
||||
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_never_reports_sandbox_failure",
|
||||
approval_policy: Never,
|
||||
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-never",
|
||||
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::Workspace("ww_on_request.txt"),
|
||||
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "workspace-network-ok",
|
||||
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "hello unified exec",
|
||||
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.approval_policy = approval_policy;
|
||||
config.sandbox_policy = sandbox_policy.clone();
|
||||
let model = model_override.unwrap_or("gpt-5");
|
||||
let model = model_override.unwrap_or("gpt-5.1");
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).expect("model should map to a known family");
|
||||
|
||||
@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5-codex")
|
||||
.with_model("gpt-5.1-codex")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5")
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||
// Default model is gpt-5.1-codex in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
|
||||
config.model_context_window = Some(272_000);
|
||||
})
|
||||
.build(&server)
|
||||
|
||||
@@ -18,7 +18,6 @@ use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
@@ -111,9 +110,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
|
||||
let expected_model = "gpt-5.1-codex";
|
||||
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) =
|
||||
start_test_conversation(&server, Some(expected_model)).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -189,7 +189,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
@@ -558,7 +557,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
mount_second_compact_flow(&server).await;
|
||||
|
||||
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -808,6 +807,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
async fn start_test_conversation(
|
||||
server: &MockServer,
|
||||
model: Option<&str>,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -817,7 +817,9 @@ async fn start_test_conversation(
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
|
||||
if let Some(model) = model {
|
||||
config.model = model.to_string();
|
||||
}
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.new_conversation(config.clone())
|
||||
|
||||
@@ -11,7 +11,7 @@ use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
|
||||
|
||||
fn ripgrep_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
|
||||
@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1".to_string()).await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5-codex".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1-codex".to_string()).await
|
||||
}
|
||||
|
||||
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
|
||||
|
||||
@@ -160,7 +160,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
|
||||
(
|
||||
"gpt-5",
|
||||
"gpt-5.1",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
@@ -183,7 +183,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
],
|
||||
),
|
||||
(
|
||||
"gpt-5-codex",
|
||||
"gpt-5.1-codex",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
|
||||
@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
cfg.review_model = "gpt-5.1".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -376,9 +376,9 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1 is a model family");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -741,9 +741,9 @@ async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
@@ -847,9 +847,9 @@ async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -41,9 +41,9 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -105,9 +105,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let call_id = "shell-single-truncation";
|
||||
|
||||
@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
|
||||
async fn undo_harness() -> Result<TestCodexHarness> {
|
||||
TestCodexHarness::with_config(|config: &mut Config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
config.features.enable(Feature::GhostCommit);
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -159,7 +159,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -236,7 +236,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -288,28 +288,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
begin_event.cwd, workdir,
|
||||
"exec_command cwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs
|
||||
.get(call_id)
|
||||
.expect("missing exec_command workdir output");
|
||||
let output_text = output.output.trim();
|
||||
let output_canonical = std::fs::canonicalize(output_text)?;
|
||||
let expected_canonical = std::fs::canonicalize(&workdir)?;
|
||||
assert_eq!(
|
||||
output_canonical, expected_canonical,
|
||||
"pwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ Start a new session with optional overrides:
|
||||
|
||||
Request `newConversation` params (subset):
|
||||
|
||||
- `model`: string model id (e.g. "o3", "gpt-5", "gpt-5-codex")
|
||||
- `model`: string model id (e.g. "o3", "gpt-5.1", "gpt-5.1-codex")
|
||||
- `profile`: optional named profile
|
||||
- `cwd`: optional working directory
|
||||
- `approvalPolicy`: `untrusted` | `on-request` | `on-failure` | `never`
|
||||
@@ -119,13 +119,13 @@ For the complete request/response shapes and flow examples, see the [“Auth end
|
||||
## Example: start and send a message
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5", "approvalPolicy": "on-request" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5.1", "approvalPolicy": "on-request" } }
|
||||
```
|
||||
|
||||
Server responds:
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5.1", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
```
|
||||
|
||||
Then send input:
|
||||
|
||||
@@ -37,7 +37,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("tell me a joke")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
@@ -196,7 +196,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
@@ -218,7 +218,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5-high")
|
||||
.arg("gpt-5.1-high")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
@@ -231,7 +231,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
|
||||
let stderr = String::from_utf8(output.stderr)?;
|
||||
assert!(
|
||||
stderr.contains("model: gpt-5-high"),
|
||||
stderr.contains("model: gpt-5.1-high"),
|
||||
"stderr missing model override: {stderr}"
|
||||
);
|
||||
if cfg!(target_os = "windows") {
|
||||
|
||||
@@ -10,7 +10,7 @@ expression: sanitized
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Model: gpt-5.1-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
|
||||
@@ -4,20 +4,20 @@ expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning high, summaries detailed) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: workspace-write │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [██████░░░░░░░░░░░░░░] 28% left (resets 03:14) │
|
||||
│ Weekly limit: [███████████░░░░░░░░░] 55% left (resets 03:24) │
|
||||
╰─────────────────────────────────────────────────────────────────────╯
|
||||
╭───────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5.1-codex (reasoning high, summaries detailed) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: workspace-write │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [██████░░░░░░░░░░░░░░] 28% left (resets 03:14) │
|
||||
│ Weekly limit: [███████████░░░░░░░░░] 55% left (resets 03:24) │
|
||||
╰───────────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -4,19 +4,19 @@ expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: data not available yet │
|
||||
╰─────────────────────────────────────────────────────────────────╯
|
||||
╭───────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5.1-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: data not available yet │
|
||||
╰───────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -4,19 +4,19 @@ expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: data not available yet │
|
||||
╰─────────────────────────────────────────────────────────────────╯
|
||||
╭───────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5.1-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: data not available yet │
|
||||
╰───────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -10,7 +10,7 @@ expression: sanitized
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Model: gpt-5.1-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
|
||||
@@ -4,21 +4,19 @@ expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/ │
|
||||
│ usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + │
|
||||
│ Context window: 100% left (2.25K used / │
|
||||
│ 5h limit: [██████░░░░░░░░░░░░░░] │
|
||||
│ (resets 03:14) │
|
||||
╰────────────────────────────────────────────╯
|
||||
╭────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5.1-codex (reasoning high, summaries detail │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [██████░░░░░░░░░░░░░░] 28% left (resets 03:14) │
|
||||
╰────────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -81,7 +81,7 @@ fn reset_at_from(captured_at: &chrono::DateTime<chrono::Local>, seconds: i64) ->
|
||||
fn status_snapshot_includes_reasoning_details() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_provider_id = "openai".to_string();
|
||||
config.model_reasoning_effort = Some(ReasoningEffort::High);
|
||||
config.model_reasoning_summary = ReasoningSummary::Detailed;
|
||||
@@ -144,7 +144,7 @@ fn status_snapshot_includes_reasoning_details() {
|
||||
fn status_snapshot_includes_monthly_limit() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_provider_id = "openai".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
@@ -194,7 +194,7 @@ fn status_snapshot_includes_monthly_limit() {
|
||||
fn status_card_token_usage_excludes_cached_tokens() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let auth_manager = test_auth_manager(&config);
|
||||
@@ -232,7 +232,7 @@ fn status_card_token_usage_excludes_cached_tokens() {
|
||||
fn status_snapshot_truncates_in_narrow_terminal() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_provider_id = "openai".to_string();
|
||||
config.model_reasoning_effort = Some(ReasoningEffort::High);
|
||||
config.model_reasoning_summary = ReasoningSummary::Detailed;
|
||||
@@ -270,7 +270,7 @@ fn status_snapshot_truncates_in_narrow_terminal() {
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(46));
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(70));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
*line = line.replace('\\', "/");
|
||||
@@ -285,7 +285,7 @@ fn status_snapshot_truncates_in_narrow_terminal() {
|
||||
fn status_snapshot_shows_missing_limits_message() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let auth_manager = test_auth_manager(&config);
|
||||
@@ -325,7 +325,7 @@ fn status_snapshot_shows_missing_limits_message() {
|
||||
fn status_snapshot_shows_empty_limits_message() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let auth_manager = test_auth_manager(&config);
|
||||
@@ -370,7 +370,7 @@ fn status_snapshot_shows_empty_limits_message() {
|
||||
fn status_snapshot_shows_stale_limits_message() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let auth_manager = test_auth_manager(&config);
|
||||
|
||||
@@ -158,7 +158,7 @@ mod tests {
|
||||
#[test]
|
||||
fn model_mapping_builds_tokenizer() -> Result<(), TokenizerError> {
|
||||
// Choose a long-standing model alias that maps to cl100k_base.
|
||||
let tok = Tokenizer::for_model("gpt-5")?;
|
||||
let tok = Tokenizer::for_model("gpt-5.1")?;
|
||||
let ids = tok.encode("ok", false);
|
||||
let back = tok.decode(&ids)?;
|
||||
assert_eq!(back, "ok");
|
||||
|
||||
@@ -64,7 +64,7 @@ Notes:
|
||||
The model that Codex should use.
|
||||
|
||||
```toml
|
||||
model = "gpt-5" # overrides the default ("gpt-5-codex" on macOS/Linux, "gpt-5" on Windows)
|
||||
model = "gpt-5.1" # overrides the default ("gpt-5.1-codex" on macOS/Linux, "gpt-5.1" on Windows)
|
||||
```
|
||||
|
||||
### model_providers
|
||||
@@ -191,7 +191,7 @@ model = "mistral"
|
||||
|
||||
### model_reasoning_effort
|
||||
|
||||
If the selected model is known to support reasoning (for example: `o3`, `o4-mini`, `codex-*`, `gpt-5`, `gpt-5-codex`), reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning), this can be set to:
|
||||
If the selected model is known to support reasoning (for example: `o3`, `o4-mini`, `codex-*`, `gpt-5.1`, `gpt-5.1-codex`), reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning), this can be set to:
|
||||
|
||||
- `"minimal"`
|
||||
- `"low"`
|
||||
@@ -227,7 +227,7 @@ When set, Codex includes a `text` object in the request payload with the configu
|
||||
Example:
|
||||
|
||||
```toml
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
model_verbosity = "low"
|
||||
```
|
||||
|
||||
@@ -818,7 +818,7 @@ Users can specify config values at multiple levels. Order of precedence is as fo
|
||||
1. custom command-line argument, e.g., `--model o3`
|
||||
2. as part of a profile, where the `--profile` is specified via a CLI (or in the config file itself)
|
||||
3. as an entry in `config.toml`, e.g., `model = "o3"`
|
||||
4. the default value that comes with Codex CLI (i.e., Codex CLI defaults to `gpt-5-codex`)
|
||||
4. the default value that comes with Codex CLI (i.e., Codex CLI defaults to `gpt-5.1-codex`)
|
||||
|
||||
### history
|
||||
|
||||
@@ -921,7 +921,7 @@ Valid values:
|
||||
|
||||
| Key | Type / Values | Notes |
|
||||
| ------------------------------------------------ | ----------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `model` | string | Model to use (e.g., `gpt-5-codex`). |
|
||||
| `model` | string | Model to use (e.g., `gpt-5.1-codex`). |
|
||||
| `model_provider` | string | Provider id from `model_providers` (default: `openai`). |
|
||||
| `model_context_window` | number | Context window tokens. |
|
||||
| `model_max_output_tokens` | number | Max output tokens. |
|
||||
|
||||
@@ -19,11 +19,11 @@ Use this example configuration as a starting point. For an explanation of each f
|
||||
################################################################################
|
||||
|
||||
# Primary model used by Codex. Default differs by OS; non-Windows defaults here.
|
||||
# Linux/macOS default: "gpt-5-codex"; Windows default: "gpt-5".
|
||||
model = "gpt-5-codex"
|
||||
# Linux/macOS default: "gpt-5.1-codex"; Windows default: "gpt-5.1".
|
||||
model = "gpt-5.1-codex"
|
||||
|
||||
# Model used by the /review feature (code reviews). Default: "gpt-5-codex".
|
||||
review_model = "gpt-5-codex"
|
||||
# Model used by the /review feature (code reviews). Default: "gpt-5.1-codex".
|
||||
review_model = "gpt-5.1-codex"
|
||||
|
||||
# Provider id selected from [model_providers]. Default: "openai".
|
||||
model_provider = "openai"
|
||||
@@ -315,7 +315,7 @@ mcp_oauth_credentials_store = "auto"
|
||||
[profiles]
|
||||
|
||||
# [profiles.default]
|
||||
# model = "gpt-5-codex"
|
||||
# model = "gpt-5.1-codex"
|
||||
# model_provider = "openai"
|
||||
# approval_policy = "on-request"
|
||||
# sandbox_mode = "read-only"
|
||||
|
||||
@@ -99,8 +99,8 @@ codex exec resume --last "Fix use-after-free issues"
|
||||
Only the conversation context is preserved; you must still provide flags to customize Codex behavior.
|
||||
|
||||
```shell
|
||||
codex exec --model gpt-5-codex --json "Review the change, look for use-after-free issues"
|
||||
codex exec --model gpt-5 --json resume --last "Fix use-after-free issues"
|
||||
codex exec --model gpt-5.1-codex --json "Review the change, look for use-after-free issues"
|
||||
codex exec --model gpt-5.1 --json resume --last "Fix use-after-free issues"
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
Reference in New Issue
Block a user