mirror of
https://github.com/openai/codex.git
synced 2026-04-29 17:06:51 +00:00
## Why `argument-comment-lint` was green in CI even though the repo still had many uncommented literal arguments. The main gap was target coverage: the repo wrapper did not force Cargo to inspect test-only call sites, so examples like the `latest_session_lookup_params(true, ...)` tests in `codex-rs/tui_app_server/src/lib.rs` never entered the blocking CI path. This change cleans up the existing backlog, makes the default repo lint path cover all Cargo targets, and starts rolling that stricter CI enforcement out on the platform where it is currently validated. ## What changed - mechanically fixed existing `argument-comment-lint` violations across the `codex-rs` workspace, including tests, examples, and benches - updated `tools/argument-comment-lint/run-prebuilt-linter.sh` and `tools/argument-comment-lint/run.sh` so non-`--fix` runs default to `--all-targets` unless the caller explicitly narrows the target set - fixed both wrappers so forwarded cargo arguments after `--` are preserved with a single separator - documented the new default behavior in `tools/argument-comment-lint/README.md` - updated `rust-ci` so the macOS lint lane keeps the plain wrapper invocation and therefore enforces `--all-targets`, while Linux and Windows temporarily pass `-- --lib --bins` That temporary CI split keeps the stricter all-targets check where it is already cleaned up, while leaving room to finish the remaining Linux- and Windows-specific target-gated cleanup before enabling `--all-targets` on those runners. The Linux and Windows failures on the intermediate revision were caused by the wrapper forwarding bug, not by additional lint findings in those lanes. ## Validation - `bash -n tools/argument-comment-lint/run.sh` - `bash -n tools/argument-comment-lint/run-prebuilt-linter.sh` - shell-level wrapper forwarding check for `-- --lib --bins` - shell-level wrapper forwarding check for `-- --tests` - `just argument-comment-lint` - `cargo test` in `tools/argument-comment-lint` - `cargo test -p codex-terminal-detection` ## Follow-up - Clean up remaining Linux-only target-gated callsites, then switch the Linux lint lane back to the plain wrapper invocation. - Clean up remaining Windows-only target-gated callsites, then switch the Windows lint lane back to the plain wrapper invocation.
1094 lines
38 KiB
Rust
1094 lines
38 KiB
Rust
use anyhow::Result;
|
|
use codex_core::CodexAuth;
|
|
use codex_core::config::types::Personality;
|
|
use codex_core::models_manager::manager::RefreshStrategy;
|
|
use codex_features::Feature;
|
|
use codex_protocol::config_types::ReasoningSummary;
|
|
use codex_protocol::config_types::ServiceTier;
|
|
use codex_protocol::openai_models::ConfigShellToolType;
|
|
use codex_protocol::openai_models::InputModality;
|
|
use codex_protocol::openai_models::ModelInfo;
|
|
use codex_protocol::openai_models::ModelVisibility;
|
|
use codex_protocol::openai_models::ModelsResponse;
|
|
use codex_protocol::openai_models::ReasoningEffort;
|
|
use codex_protocol::openai_models::ReasoningEffortPreset;
|
|
use codex_protocol::openai_models::TruncationPolicyConfig;
|
|
use codex_protocol::openai_models::default_input_modalities;
|
|
use codex_protocol::protocol::AskForApproval;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::protocol::SandboxPolicy;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::responses::ev_completed_with_tokens;
|
|
use core_test_support::responses::ev_image_generation_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_models_once;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::mount_sse_sequence;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::sse_completed;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use pretty_assertions::assert_eq;
|
|
use std::path::Path;
|
|
use std::path::PathBuf;
|
|
use wiremock::MockServer;
|
|
|
|
fn image_generation_artifact_path(codex_home: &Path, session_id: &str, call_id: &str) -> PathBuf {
|
|
fn sanitize(value: &str) -> String {
|
|
let mut sanitized: String = value
|
|
.chars()
|
|
.map(|ch| {
|
|
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
|
|
ch
|
|
} else {
|
|
'_'
|
|
}
|
|
})
|
|
.collect();
|
|
if sanitized.is_empty() {
|
|
sanitized = "generated_image".to_string();
|
|
}
|
|
sanitized
|
|
}
|
|
|
|
codex_home
|
|
.join("generated_images")
|
|
.join(sanitize(session_id))
|
|
.join(format!("{}.png", sanitize(call_id)))
|
|
}
|
|
|
|
fn test_model_info(
|
|
slug: &str,
|
|
display_name: &str,
|
|
description: &str,
|
|
input_modalities: Vec<InputModality>,
|
|
) -> ModelInfo {
|
|
ModelInfo {
|
|
slug: slug.to_string(),
|
|
display_name: display_name.to_string(),
|
|
description: Some(description.to_string()),
|
|
default_reasoning_level: Some(ReasoningEffort::Medium),
|
|
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
|
effort: ReasoningEffort::Medium,
|
|
description: ReasoningEffort::Medium.to_string(),
|
|
}],
|
|
shell_type: ConfigShellToolType::ShellCommand,
|
|
visibility: ModelVisibility::List,
|
|
supported_in_api: true,
|
|
input_modalities,
|
|
used_fallback_model_metadata: false,
|
|
supports_search_tool: false,
|
|
priority: 1,
|
|
upgrade: None,
|
|
base_instructions: "base instructions".to_string(),
|
|
model_messages: None,
|
|
supports_reasoning_summaries: false,
|
|
default_reasoning_summary: ReasoningSummary::Auto,
|
|
support_verbosity: false,
|
|
default_verbosity: None,
|
|
availability_nux: None,
|
|
apply_patch_tool_type: None,
|
|
web_search_tool_type: Default::default(),
|
|
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
|
|
supports_parallel_tool_calls: false,
|
|
supports_image_detail_original: false,
|
|
context_window: Some(272_000),
|
|
auto_compact_token_limit: None,
|
|
effective_context_window_percent: 95,
|
|
experimental_supported_tools: Vec::new(),
|
|
}
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn model_change_appends_model_instructions_developer_message() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = MockServer::start().await;
|
|
let resp_mock = mount_sse_sequence(
|
|
&server,
|
|
vec![sse_completed("resp-1"), sse_completed("resp-2")],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex().with_model("gpt-5.2-codex");
|
|
let test = builder.build(&server).await?;
|
|
let next_model = "gpt-5.1-codex-max";
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "hello".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: test.session_configured.model.clone(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::OverrideTurnContext {
|
|
cwd: None,
|
|
approval_policy: None,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: None,
|
|
windows_sandbox_level: None,
|
|
model: Some(next_model.to_string()),
|
|
effort: None,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "switch models".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: next_model.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = resp_mock.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
let developer_texts = second_request.message_input_texts("developer");
|
|
let model_switch_text = developer_texts
|
|
.iter()
|
|
.find(|text| text.contains("<model_switch>"))
|
|
.expect("expected model switch message in developer input");
|
|
assert!(
|
|
model_switch_text.contains("The user was previously using a different model."),
|
|
"expected model switch preamble, got: {model_switch_text:?}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn model_and_personality_change_only_appends_model_instructions() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let resp_mock = mount_sse_sequence(
|
|
&server,
|
|
vec![sse_completed("resp-1"), sse_completed("resp-2")],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_model("gpt-5.2-codex")
|
|
.with_config(|config| {
|
|
config
|
|
.features
|
|
.enable(Feature::Personality)
|
|
.expect("test config should allow feature update");
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let next_model = "exp-codex-personality";
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "hello".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: test.session_configured.model.clone(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::OverrideTurnContext {
|
|
cwd: None,
|
|
approval_policy: None,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: None,
|
|
windows_sandbox_level: None,
|
|
model: Some(next_model.to_string()),
|
|
effort: None,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: Some(Personality::Pragmatic),
|
|
})
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "switch model and personality".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: next_model.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = resp_mock.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
let developer_texts = second_request.message_input_texts("developer");
|
|
assert!(
|
|
developer_texts
|
|
.iter()
|
|
.any(|text| text.contains("<model_switch>")),
|
|
"expected model switch message when model changes"
|
|
);
|
|
assert!(
|
|
!developer_texts
|
|
.iter()
|
|
.any(|text| text.contains("<personality_spec>")),
|
|
"did not expect personality update message when model changed in same turn"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn service_tier_change_is_applied_on_next_http_turn() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let resp_mock = mount_sse_sequence(
|
|
&server,
|
|
vec![sse_completed("resp-1"), sse_completed("resp-2")],
|
|
)
|
|
.await;
|
|
|
|
let test = test_codex().build(&server).await?;
|
|
|
|
test.submit_turn_with_service_tier("fast turn", Some(ServiceTier::Fast))
|
|
.await?;
|
|
test.submit_turn_with_service_tier("standard turn", /*service_tier*/ None)
|
|
.await?;
|
|
|
|
let requests = resp_mock.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let first_body = requests[0].body_json();
|
|
let second_body = requests[1].body_json();
|
|
|
|
assert_eq!(first_body["service_tier"].as_str(), Some("priority"));
|
|
assert_eq!(second_body.get("service_tier"), None);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn flex_service_tier_is_applied_to_http_turn() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let resp_mock = mount_sse_once(&server, sse_completed("resp-1")).await;
|
|
|
|
let test = test_codex().build(&server).await?;
|
|
|
|
test.submit_turn_with_service_tier("flex turn", Some(ServiceTier::Flex))
|
|
.await?;
|
|
|
|
let request = resp_mock.single_request();
|
|
let body = request.body_json();
|
|
assert_eq!(body["service_tier"].as_str(), Some("flex"));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn model_change_from_image_to_text_strips_prior_image_content() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = MockServer::start().await;
|
|
let image_model_slug = "test-image-model";
|
|
let text_model_slug = "test-text-only-model";
|
|
let image_model = test_model_info(
|
|
image_model_slug,
|
|
"Test Image Model",
|
|
"supports image input",
|
|
default_input_modalities(),
|
|
);
|
|
let text_model = test_model_info(
|
|
text_model_slug,
|
|
"Test Text Model",
|
|
"text only",
|
|
vec![InputModality::Text],
|
|
);
|
|
mount_models_once(
|
|
&server,
|
|
ModelsResponse {
|
|
models: vec![image_model, text_model],
|
|
},
|
|
)
|
|
.await;
|
|
|
|
let responses = mount_sse_sequence(
|
|
&server,
|
|
vec![sse_completed("resp-1"), sse_completed("resp-2")],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
|
.with_config(move |config| {
|
|
config.model = Some(image_model_slug.to_string());
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let models_manager = test.thread_manager.get_models_manager();
|
|
let _ = models_manager
|
|
.list_models(RefreshStrategy::OnlineIfUncached)
|
|
.await;
|
|
let image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGNgYAAAAAMAASsJTYQAAAAASUVORK5CYII="
|
|
.to_string();
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![
|
|
UserInput::Image {
|
|
image_url: image_url.clone(),
|
|
},
|
|
UserInput::Text {
|
|
text: "first turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
},
|
|
],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "second turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: text_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = responses.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let first_request = requests.first().expect("expected first request");
|
|
assert!(
|
|
!first_request.message_input_image_urls("user").is_empty(),
|
|
"first request should include the uploaded image"
|
|
);
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
assert!(
|
|
second_request.message_input_image_urls("user").is_empty(),
|
|
"second request should strip unsupported image content"
|
|
);
|
|
let second_user_texts = second_request.message_input_texts("user");
|
|
assert!(
|
|
second_user_texts
|
|
.iter()
|
|
.any(|text| text == "image content omitted because you do not support image input"),
|
|
"second request should include the image-omitted placeholder text"
|
|
);
|
|
assert!(
|
|
second_user_texts
|
|
.iter()
|
|
.any(|text| text == &codex_protocol::models::image_open_tag_text()),
|
|
"second request should preserve the image open tag text"
|
|
);
|
|
assert!(
|
|
second_user_texts
|
|
.iter()
|
|
.any(|text| text == &codex_protocol::models::image_close_tag_text()),
|
|
"second request should preserve the image close tag text"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn generated_image_is_replayed_for_image_capable_models() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = MockServer::start().await;
|
|
let image_model_slug = "test-image-model";
|
|
let image_model = test_model_info(
|
|
image_model_slug,
|
|
"Test Image Model",
|
|
"supports image input",
|
|
default_input_modalities(),
|
|
);
|
|
mount_models_once(
|
|
&server,
|
|
ModelsResponse {
|
|
models: vec![image_model],
|
|
},
|
|
)
|
|
.await;
|
|
|
|
let responses = mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_image_generation_call("ig_123", "completed", "lobster", "Zm9v"),
|
|
ev_completed_with_tokens("resp-1", /*total_tokens*/ 10),
|
|
]),
|
|
sse_completed("resp-2"),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
|
.with_config(move |config| {
|
|
config.model = Some(image_model_slug.to_string());
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let saved_path = image_generation_artifact_path(
|
|
test.codex_home_path(),
|
|
&test.session_configured.session_id.to_string(),
|
|
"ig_123",
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
let models_manager = test.thread_manager.get_models_manager();
|
|
let _ = models_manager
|
|
.list_models(RefreshStrategy::OnlineIfUncached)
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "generate a lobster".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "describe the generated image".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = responses.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
let image_generation_calls = second_request.inputs_of_type("image_generation_call");
|
|
assert_eq!(
|
|
image_generation_calls.len(),
|
|
1,
|
|
"expected generated image history to be replayed as an image_generation_call"
|
|
);
|
|
assert_eq!(
|
|
image_generation_calls[0]["id"].as_str(),
|
|
Some("ig_123"),
|
|
"expected the original image generation call id to be preserved"
|
|
);
|
|
assert_eq!(
|
|
image_generation_calls[0]["result"].as_str(),
|
|
Some("Zm9v"),
|
|
"expected the original generated image payload to be preserved"
|
|
);
|
|
assert!(
|
|
second_request
|
|
.message_input_texts("developer")
|
|
.iter()
|
|
.any(|text| text.contains("Generated images are saved to")),
|
|
"second request should include the saved-path note in model-visible history"
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn model_change_from_generated_image_to_text_preserves_prior_generated_image_call()
|
|
-> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = MockServer::start().await;
|
|
let image_model_slug = "test-image-model";
|
|
let text_model_slug = "test-text-only-model";
|
|
let image_model = test_model_info(
|
|
image_model_slug,
|
|
"Test Image Model",
|
|
"supports image input",
|
|
default_input_modalities(),
|
|
);
|
|
let text_model = test_model_info(
|
|
text_model_slug,
|
|
"Test Text Model",
|
|
"text only",
|
|
vec![InputModality::Text],
|
|
);
|
|
mount_models_once(
|
|
&server,
|
|
ModelsResponse {
|
|
models: vec![image_model, text_model],
|
|
},
|
|
)
|
|
.await;
|
|
|
|
let responses = mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_image_generation_call("ig_123", "completed", "lobster", "Zm9v"),
|
|
ev_completed_with_tokens("resp-1", /*total_tokens*/ 10),
|
|
]),
|
|
sse_completed("resp-2"),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
|
.with_config(move |config| {
|
|
config.model = Some(image_model_slug.to_string());
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let saved_path = image_generation_artifact_path(
|
|
test.codex_home_path(),
|
|
&test.session_configured.session_id.to_string(),
|
|
"ig_123",
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
let models_manager = test.thread_manager.get_models_manager();
|
|
let _ = models_manager
|
|
.list_models(RefreshStrategy::OnlineIfUncached)
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "generate a lobster".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "describe the generated image".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: text_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = responses.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
let image_generation_calls = second_request.inputs_of_type("image_generation_call");
|
|
assert!(
|
|
second_request.message_input_image_urls("user").is_empty(),
|
|
"second request should not rewrite generated images into message input images"
|
|
);
|
|
assert!(
|
|
image_generation_calls.len() == 1,
|
|
"second request should preserve the generated image call for text-only models"
|
|
);
|
|
assert_eq!(
|
|
image_generation_calls[0]["id"].as_str(),
|
|
Some("ig_123"),
|
|
"second request should preserve the original generated image call id"
|
|
);
|
|
assert_eq!(
|
|
image_generation_calls[0]["result"].as_str(),
|
|
Some(""),
|
|
"second request should strip generated image bytes for text-only models"
|
|
);
|
|
assert!(
|
|
second_request
|
|
.message_input_texts("user")
|
|
.iter()
|
|
.all(|text| text != "image content omitted because you do not support image input"),
|
|
"second request should not inject the image-omitted placeholder text"
|
|
);
|
|
assert!(
|
|
second_request
|
|
.message_input_texts("developer")
|
|
.iter()
|
|
.any(|text| text.contains("Generated images are saved to")),
|
|
"second request should include the saved-path note in model-visible history"
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn thread_rollback_after_generated_image_drops_entire_image_turn_history() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = MockServer::start().await;
|
|
let image_model_slug = "test-image-model";
|
|
let image_model = test_model_info(
|
|
image_model_slug,
|
|
"Test Image Model",
|
|
"supports image input",
|
|
default_input_modalities(),
|
|
);
|
|
mount_models_once(
|
|
&server,
|
|
ModelsResponse {
|
|
models: vec![image_model],
|
|
},
|
|
)
|
|
.await;
|
|
|
|
let responses = mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_image_generation_call("ig_rollback", "completed", "lobster", "Zm9v"),
|
|
ev_completed_with_tokens("resp-1", /*total_tokens*/ 10),
|
|
]),
|
|
sse_completed("resp-2"),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
|
.with_config(move |config| {
|
|
config.model = Some(image_model_slug.to_string());
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let saved_path = image_generation_artifact_path(
|
|
test.codex_home_path(),
|
|
&test.session_configured.session_id.to_string(),
|
|
"ig_rollback",
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
let models_manager = test.thread_manager.get_models_manager();
|
|
let _ = models_manager
|
|
.list_models(RefreshStrategy::OnlineIfUncached)
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "generate a lobster".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::ThreadRollback { num_turns: 1 })
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| {
|
|
matches!(ev, EventMsg::ThreadRolledBack(_))
|
|
})
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "after rollback".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: image_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
service_tier: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
let requests = responses.requests();
|
|
assert_eq!(requests.len(), 2, "expected two model requests");
|
|
|
|
let second_request = requests.last().expect("expected second request");
|
|
assert!(
|
|
!second_request
|
|
.message_input_texts("user")
|
|
.iter()
|
|
.any(|text| text == "generate a lobster"),
|
|
"rollback should remove the rolled-back image-generation user turn"
|
|
);
|
|
assert!(
|
|
!second_request
|
|
.message_input_texts("developer")
|
|
.iter()
|
|
.any(|text| text.contains("Generated images are saved to")),
|
|
"rollback should remove the generated-image save note with the rolled-back turn"
|
|
);
|
|
assert!(
|
|
second_request
|
|
.inputs_of_type("image_generation_call")
|
|
.is_empty(),
|
|
"rollback should remove the generated image call with the rolled-back turn"
|
|
);
|
|
let _ = std::fs::remove_file(&saved_path);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn model_switch_to_smaller_model_updates_token_context_window() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
let large_model_slug = "test-image-model";
|
|
let smaller_model_slug = "test-text-only-model";
|
|
let large_context_window = 272_000;
|
|
let smaller_context_window = 128_000;
|
|
let effective_context_window_percent = 95;
|
|
let large_effective_window = (large_context_window * effective_context_window_percent) / 100;
|
|
let smaller_effective_window =
|
|
(smaller_context_window * effective_context_window_percent) / 100;
|
|
|
|
let base_model = ModelInfo {
|
|
slug: large_model_slug.to_string(),
|
|
display_name: "Larger Model".to_string(),
|
|
description: Some("larger context window model".to_string()),
|
|
default_reasoning_level: Some(ReasoningEffort::Medium),
|
|
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
|
effort: ReasoningEffort::Medium,
|
|
description: ReasoningEffort::Medium.to_string(),
|
|
}],
|
|
shell_type: ConfigShellToolType::ShellCommand,
|
|
visibility: ModelVisibility::List,
|
|
supported_in_api: true,
|
|
input_modalities: default_input_modalities(),
|
|
used_fallback_model_metadata: false,
|
|
supports_search_tool: false,
|
|
priority: 1,
|
|
upgrade: None,
|
|
base_instructions: "base instructions".to_string(),
|
|
model_messages: None,
|
|
supports_reasoning_summaries: false,
|
|
default_reasoning_summary: ReasoningSummary::Auto,
|
|
support_verbosity: false,
|
|
default_verbosity: None,
|
|
availability_nux: None,
|
|
apply_patch_tool_type: None,
|
|
web_search_tool_type: Default::default(),
|
|
truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000),
|
|
supports_parallel_tool_calls: false,
|
|
supports_image_detail_original: false,
|
|
context_window: Some(large_context_window),
|
|
auto_compact_token_limit: None,
|
|
effective_context_window_percent,
|
|
experimental_supported_tools: Vec::new(),
|
|
};
|
|
let mut smaller_model = base_model.clone();
|
|
smaller_model.slug = smaller_model_slug.to_string();
|
|
smaller_model.display_name = "Smaller Model".to_string();
|
|
smaller_model.description = Some("smaller context window model".to_string());
|
|
smaller_model.context_window = Some(smaller_context_window);
|
|
|
|
mount_models_once(
|
|
&server,
|
|
ModelsResponse {
|
|
models: vec![base_model, smaller_model],
|
|
},
|
|
)
|
|
.await;
|
|
|
|
mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_completed_with_tokens("resp-1", /*total_tokens*/ 100),
|
|
]),
|
|
sse(vec![
|
|
ev_response_created("resp-2"),
|
|
ev_completed_with_tokens("resp-2", /*total_tokens*/ 120),
|
|
]),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let mut builder = test_codex()
|
|
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
|
.with_config(|config| {
|
|
config.model = Some(large_model_slug.to_string());
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let models_manager = test.thread_manager.get_models_manager();
|
|
let available_models = models_manager.list_models(RefreshStrategy::Online).await;
|
|
assert!(
|
|
available_models
|
|
.iter()
|
|
.any(|model| model.model == smaller_model_slug),
|
|
"expected {smaller_model_slug} to be available in remote model list"
|
|
);
|
|
let large_model_info = models_manager
|
|
.get_model_info(large_model_slug, &test.config)
|
|
.await;
|
|
assert_eq!(large_model_info.context_window, Some(large_context_window));
|
|
let smaller_model_info = models_manager
|
|
.get_model_info(smaller_model_slug, &test.config)
|
|
.await;
|
|
assert_eq!(
|
|
smaller_model_info.context_window,
|
|
Some(smaller_context_window)
|
|
);
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "use larger model".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: large_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
let large_window_event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::TokenCount(token_count)
|
|
if token_count
|
|
.info
|
|
.as_ref()
|
|
.is_some_and(|info| info.last_token_usage.total_tokens == 100)
|
|
)
|
|
})
|
|
.await;
|
|
let EventMsg::TokenCount(large_token_count) = large_window_event else {
|
|
unreachable!("wait_for_event returned unexpected event");
|
|
};
|
|
assert_eq!(
|
|
large_token_count
|
|
.info
|
|
.as_ref()
|
|
.and_then(|info| info.model_context_window),
|
|
Some(large_effective_window)
|
|
);
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
test.codex
|
|
.submit(Op::OverrideTurnContext {
|
|
cwd: None,
|
|
approval_policy: None,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: None,
|
|
windows_sandbox_level: None,
|
|
model: Some(smaller_model_slug.to_string()),
|
|
effort: None,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "switch to smaller model".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
approvals_reviewer: None,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
model: smaller_model_slug.to_string(),
|
|
effort: test.config.model_reasoning_effort,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
let smaller_turn_started_event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::TurnStarted(started)
|
|
if started.model_context_window == Some(smaller_effective_window)
|
|
)
|
|
})
|
|
.await;
|
|
let EventMsg::TurnStarted(smaller_turn_started) = smaller_turn_started_event else {
|
|
unreachable!("wait_for_event returned unexpected event");
|
|
};
|
|
assert_eq!(
|
|
smaller_turn_started.model_context_window,
|
|
Some(smaller_effective_window)
|
|
);
|
|
|
|
let smaller_window_event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::TokenCount(token_count)
|
|
if token_count
|
|
.info
|
|
.as_ref()
|
|
.is_some_and(|info| info.last_token_usage.total_tokens == 120)
|
|
)
|
|
})
|
|
.await;
|
|
let EventMsg::TokenCount(smaller_token_count) = smaller_window_event else {
|
|
unreachable!("wait_for_event returned unexpected event");
|
|
};
|
|
let smaller_window = smaller_token_count
|
|
.info
|
|
.as_ref()
|
|
.and_then(|info| info.model_context_window);
|
|
assert_eq!(smaller_window, Some(smaller_effective_window));
|
|
assert_ne!(smaller_window, Some(large_effective_window));
|
|
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
|
|
Ok(())
|
|
}
|