From d5bd752c9a679c28cf7b8c21eba9ed4e66aaa15c Mon Sep 17 00:00:00 2001 From: won Date: Wed, 20 May 2026 15:04:07 -0700 Subject: [PATCH] draft --- .../app-server/tests/common/models_cache.rs | 1 + .../codex-api/tests/models_integration.rs | 1 + codex-rs/core/src/guardian/review.rs | 10 ++- codex-rs/core/src/guardian/tests.rs | 88 +++++++++++++++++++ codex-rs/core/tests/suite/model_switching.rs | 2 + codex-rs/core/tests/suite/models_cache_ttl.rs | 1 + codex-rs/core/tests/suite/personality.rs | 2 + codex-rs/core/tests/suite/remote_models.rs | 3 + codex-rs/core/tests/suite/rmcp_client.rs | 1 + .../tests/suite/spawn_agent_description.rs | 1 + codex-rs/core/tests/suite/view_image.rs | 1 + .../src/amazon_bedrock/catalog.rs | 2 + codex-rs/models-manager/src/model_info.rs | 1 + codex-rs/protocol/src/openai_models.rs | 4 + codex-rs/tools/src/tool_config_tests.rs | 1 + 15 files changed, 116 insertions(+), 3 deletions(-) diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index be7d5d047f..fed717b2be 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -51,6 +51,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index d2b31180b9..5a8d179536 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -97,6 +97,7 @@ async fn models_client_hits_models_endpoint() { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, }], }; diff --git a/codex-rs/core/src/guardian/review.rs b/codex-rs/core/src/guardian/review.rs index 2908492ddd..749bf6f3db 100644 --- a/codex-rs/core/src/guardian/review.rs +++ b/codex-rs/core/src/guardian/review.rs @@ -660,9 +660,13 @@ pub(super) async fn run_guardian_review_session( } }; let preferred_model_id = turn.provider.approval_review_preferred_model(); - let preferred_model = available_models - .iter() - .find(|preset| preset.model == preferred_model_id); + let preferred_model = if turn.model_info.auto_review_model_override == Some(true) { + None + } else { + available_models + .iter() + .find(|preset| preset.model == preferred_model_id) + }; let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = preferred_model { let reasoning_effort = preferred_reasoning_effort( preset diff --git a/codex-rs/core/src/guardian/tests.rs b/codex-rs/core/src/guardian/tests.rs index 0544b27770..1fdace5ddc 100644 --- a/codex-rs/core/src/guardian/tests.rs +++ b/codex-rs/core/src/guardian/tests.rs @@ -1225,6 +1225,94 @@ fn guardian_output_schema_requires_only_outcome_and_allows_optional_details() { ); } +async fn guardian_request_model_for_auto_review_override( + auto_review_model_override: Option, +) -> anyhow::Result<(String, String, String)> { + let server = start_mock_server().await; + let guardian_assessment = serde_json::json!({ + "outcome": "allow", + }) + .to_string(); + let request_log = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-guardian"), + ev_assistant_message("msg-guardian", &guardian_assessment), + ev_completed("resp-guardian"), + ]), + ) + .await; + + let (session, mut turn) = guardian_test_session_and_turn(&server).await; + Arc::get_mut(&mut turn) + .expect("turn should be unique") + .model_info + .auto_review_model_override = auto_review_model_override; + let parent_model = turn.model_info.slug.clone(); + let preferred_model = turn.provider.approval_review_preferred_model().to_string(); + seed_guardian_parent_history(&session, &turn).await; + + let outcome = run_guardian_review_session_for_test( + Arc::clone(&session), + turn, + GuardianApprovalRequest::Shell { + id: "shell-1".to_string(), + command: vec!["git".to_string(), "push".to_string()], + cwd: test_path_buf("/repo/codex-rs/core").abs(), + sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault, + additional_permissions: None, + justification: None, + }, + Some("Sandbox denied outbound git push to github.com.".to_string()), + guardian_output_schema(), + /*external_cancel*/ None, + ) + .await; + let (GuardianReviewOutcome::Completed(_), _) = outcome else { + panic!("expected guardian assessment"); + }; + + let request_model = request_log + .single_request() + .body_json() + .get("model") + .and_then(|value| value.as_str()) + .expect("guardian request should include a model") + .to_string(); + + Ok((request_model, parent_model, preferred_model)) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_review_uses_parent_model_when_model_catalog_override_is_true() +-> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let (request_model, parent_model, preferred_model) = + guardian_request_model_for_auto_review_override(Some(true)).await?; + + assert_eq!(request_model, parent_model); + assert_ne!(request_model, preferred_model); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn guardian_review_uses_preferred_review_model_without_parent_override() -> anyhow::Result<()> +{ + skip_if_no_network!(Ok(())); + + for auto_review_model_override in [None, Some(false)] { + let (request_model, parent_model, preferred_model) = + guardian_request_model_for_auto_review_override(auto_review_model_override).await?; + + assert_eq!(request_model, preferred_model); + assert_ne!(request_model, parent_model); + } + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn guardian_review_request_layout_matches_model_visible_request_snapshot() -> anyhow::Result<()> { diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 009dfda60f..dd98cab60e 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -110,6 +110,7 @@ fn test_model_info( input_modalities, used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -861,6 +862,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index 3b729738df..33f42c3797 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -368,5 +368,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index bbe8178f8d..f0cb5d76cc 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -590,6 +590,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, }; let _models_mock = mount_models_once( @@ -699,6 +700,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, }; let _models_mock = mount_models_once( diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index e143cb8e3e..236e0ed3ec 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -472,6 +472,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -719,6 +720,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), @@ -1200,6 +1202,7 @@ fn test_remote_model_with_policy( input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 210c287c2f..10e35d8d98 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1296,6 +1296,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re input_modalities: vec![InputModality::Text], used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, }], }, ) diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index 9a7d70adeb..6b546d91ee 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -60,6 +60,7 @@ fn test_model_info( input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers, diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 6306b88b8f..4a0ec0d473 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1354,6 +1354,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an input_modalities: vec![InputModality::Text], used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, priority: 1, additional_speed_tiers: Vec::new(), service_tiers: Vec::new(), diff --git a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs index d6fe1de095..d51faf92aa 100644 --- a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs +++ b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs @@ -76,6 +76,7 @@ fn gpt_5_4_cmb_bedrock_model(priority: i32) -> ModelInfo { input_modalities: vec![InputModality::Text, InputModality::Image], used_fallback_model_metadata: false, supports_search_tool: true, + auto_review_model_override: None, } } @@ -117,6 +118,7 @@ fn bedrock_oss_model(slug: &str, display_name: &str, priority: i32) -> ModelInfo input_modalities: vec![InputModality::Text], used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, } } diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index 774dd3eaca..dd96e8ab37 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -98,6 +98,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { input_modalities: default_input_modalities(), used_fallback_model_metadata: true, // this is the fallback model metadata supports_search_tool: false, + auto_review_model_override: None, } } diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index d51e70ddf1..fe727db138 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -312,6 +312,8 @@ pub struct ModelInfo { pub used_fallback_model_metadata: bool, #[serde(default)] pub supports_search_tool: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub auto_review_model_override: Option, } impl ModelInfo { @@ -597,6 +599,7 @@ mod tests { input_modalities: default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, } } @@ -814,6 +817,7 @@ mod tests { assert!(!model.supports_image_detail_original); assert_eq!(model.web_search_tool_type, WebSearchToolType::Text); assert!(!model.supports_search_tool); + assert_eq!(model.auto_review_model_override, None); } #[test] diff --git a/codex-rs/tools/src/tool_config_tests.rs b/codex-rs/tools/src/tool_config_tests.rs index 8ce68acbba..2f89bf7376 100644 --- a/codex-rs/tools/src/tool_config_tests.rs +++ b/codex-rs/tools/src/tool_config_tests.rs @@ -43,6 +43,7 @@ fn model_with_shell_type(shell_type: ConfigShellToolType) -> ModelInfo { input_modalities: codex_protocol::openai_models::default_input_modalities(), used_fallback_model_metadata: false, supports_search_tool: false, + auto_review_model_override: None, } }