codex: stabilize guardian approval tests

2026-04-24 06:35:50 +00:00 · 2026-03-08 13:52:13 -07:00
parent 7ba1fccfc1
commit 66aef5f0db
6 changed files with 35 additions and 58 deletions
--- a/codex-rs/core/src/codex_tests_guardian.rs
+++ b/codex-rs/core/src/codex_tests_guardian.rs
@@ -16,6 +16,8 @@ use codex_execpolicy::RuleMatch;
 use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::NetworkPermissions;
 use codex_protocol::models::PermissionProfile;
+use codex_protocol::permissions::FileSystemSandboxPolicy;
+use codex_protocol::permissions::NetworkSandboxPolicy;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use core_test_support::codex_linux_sandbox_exe_or_skip;
 use core_test_support::responses::ev_assistant_message;
@@ -70,15 +72,17 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
        .features
        .enable(Feature::RequestPermissions)
        .expect("test setup should allow enabling request permissions");
+    turn_context_raw
+        .sandbox_policy
+        .set(SandboxPolicy::DangerFullAccess)
+        .expect("test setup should allow updating sandbox policy");
    // This test is about request-permissions validation, not managed sandbox
    // policy enforcement. Widen the derived sandbox policies directly so the
    // command runs without depending on a platform sandbox binary.
    turn_context_raw.file_system_sandbox_policy =
-        codex_protocol::permissions::FileSystemSandboxPolicy::from(
-            &SandboxPolicy::DangerFullAccess,
-        );
+        FileSystemSandboxPolicy::from(turn_context_raw.sandbox_policy.get());
    turn_context_raw.network_sandbox_policy =
-        codex_protocol::permissions::NetworkSandboxPolicy::from(&SandboxPolicy::DangerFullAccess);
+        NetworkSandboxPolicy::from(turn_context_raw.sandbox_policy.get());
    let mut config = (*turn_context_raw.config).clone();
    config.model_provider.base_url = Some(format!("{}/v1", server.uri()));
    let config = Arc::new(config);
@@ -92,11 +96,14 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
    turn_context_raw.provider = config.model_provider.clone();
    let session = Arc::new(session);
    let turn_context = Arc::new(turn_context_raw);
+    let expiration_ms: u64 = if cfg!(windows) { 2_500 } else { 1_000 };

    let params = ExecParams {
        command: if cfg!(windows) {
            vec![
                "cmd.exe".to_string(),
+                "/Q".to_string(),
+                "/D".to_string(),
                "/C".to_string(),
                "echo hi".to_string(),
            ]
@@ -108,7 +115,7 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
            ]
        },
        cwd: turn_context.cwd.clone(),
-        expiration: 1000.into(),
+        expiration: expiration_ms.into(),
        env: HashMap::new(),
        network: None,
        sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(

 /// Validates feature/policy constraints for `with_additional_permissions` and
 /// normalizes any path-based permissions. Errors if the request is invalid.
-pub(super) fn normalize_and_validate_additional_permissions(
+pub(crate) fn normalize_and_validate_additional_permissions(
    request_permission_enabled: bool,
    approval_policy: AskForApproval,
    sandbox_permissions: SandboxPermissions,
--- a/codex-rs/mcp-server/tests/suite/codex_tool.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs
@@ -54,23 +54,26 @@ async fn test_shell_command_approval_triggers_elicitation() {
 async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
    // Use a simple, untrusted command that creates a file so we can
    // observe a side-effect.
-    //
-    // Cross‑platform approach: run a tiny Python snippet to touch the file
-    // using `python3 -c ...` on all platforms.
    let workdir_for_shell_function_call = TempDir::new()?;
    let created_filename = "created_by_shell_tool.txt";
    let created_file = workdir_for_shell_function_call
        .path()
        .join(created_filename);

-    let shell_command = vec![
-        "python3".to_string(),
-        "-c".to_string(),
-        format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
-    ];
-    let expected_shell_command = format_with_current_shell(&format!(
-        "python3 -c \"import pathlib; pathlib.Path('{created_filename}').touch()\""
-    ));
+    let shell_command = if cfg!(windows) {
+        vec![
+            "New-Item".to_string(),
+            "-ItemType".to_string(),
+            "File".to_string(),
+            "-Path".to_string(),
+            created_filename.to_string(),
+            "-Force".to_string(),
+        ]
+    } else {
+        vec!["touch".to_string(), created_filename.to_string()]
+    };
+    let expected_shell_command =
+        format_with_current_shell(&shlex::try_join(shell_command.iter().map(String::as_str))?);

    let McpHandle {
        process: mut mcp_process,
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval.snap
@@ -1,18 +0,0 @@
---
-source: tui/src/chatwidget/tests.rs
-expression: popup
---
-  Experimental features
-  Toggle experimental features. Changes are saved to config.toml.
-
-› [ ] JavaScript REPL              Enable a persistent Node-backed JavaScript REPL for interactive website debugging
-                                   and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
-                                   installed.
-  [ ] Multi-agents                 Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
-  [ ] Apps                         Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
-                                   Codex after enabling.
-  [ ] Guardian approvals           Let a guardian subagent review `on-request` approval prompts instead of showing
-                                   them to you, including sandbox escapes and blocked network access.
-  [ ] Prevent sleep while running  Keep your computer awake while Codex is running a thread.
-
-  Press space to select or enter to save for next conversation
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval_linux.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval_linux.snap
@@ -1,19 +0,0 @@
---
-source: tui/src/chatwidget/tests.rs
-expression: popup
---
-  Experimental features
-  Toggle experimental features. Changes are saved to config.toml.
-
-› [ ] JavaScript REPL              Enable a persistent Node-backed JavaScript REPL for interactive website debugging
-                                   and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
-                                   installed.
-  [ ] Bubblewrap sandbox           Try the new linux sandbox based on bubblewrap.
-  [ ] Multi-agents                 Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
-  [ ] Apps                         Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
-                                   Codex after enabling.
-  [ ] Guardian approvals           Let a guardian subagent review `on-request` approval prompts instead of showing
-                                   them to you, including sandbox escapes and blocked network access.
-  [ ] Prevent sleep while running  Keep your computer awake while Codex is running a thread.
-
-  Press space to select or enter to save for next conversation
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -6996,10 +6996,14 @@ async fn experimental_popup_includes_guardian_approval() {
    chat.open_experimental_popup();

    let popup = render_bottom_popup(&chat, 120);
-    #[cfg(target_os = "linux")]
-    assert_snapshot!("experimental_popup_includes_guardian_approval_linux", popup);
-    #[cfg(not(target_os = "linux"))]
-    assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
+    assert!(
+        popup.contains("Guardian approvals"),
+        "expected guardian approvals entry in experimental popup, got:\n{popup}"
+    );
+    assert!(
+        popup.contains("blocked network access"),
+        "expected guardian approvals description in experimental popup, got:\n{popup}"
+    );
 }

 #[tokio::test]