codex: fix guardian snapshot drift in PR CI

2026-04-24 14:45:27 +00:00 · 2026-03-07 11:20:01 -08:00
parent 5f1a510823
commit 934a0da85a
9 changed files with 74 additions and 95 deletions
--- a/codex-rs/core/BUILD.bazel
+++ b/codex-rs/core/BUILD.bazel
@@ -34,7 +34,7 @@ codex_rust_crate(
        "models.json",
        "prompt.md",
    ],
-    test_data_extra = [
+    test_data_extra = glob(["src/**/snapshots/**"]) + [
        "config.schema.json",
        # This is a bit of a hack, but empirically, some of our integration tests
        # are relying on the presence of this file as a repo root marker. When
--- a/codex-rs/core/src/codex_tests_guardian.rs
+++ b/codex-rs/core/src/codex_tests_guardian.rs
@@ -2,24 +2,21 @@ use super::*;
 use crate::config_loader::ConfigLayerEntry;
 use crate::config_loader::ConfigRequirements;
 use crate::config_loader::ConfigRequirementsToml;
-use crate::exec::ExecParams;
 use crate::exec_policy::ExecPolicyManager;
 use crate::features::Feature;
 use crate::guardian::GUARDIAN_SUBAGENT_NAME;
 use crate::protocol::AskForApproval;
 use crate::sandboxing::SandboxPermissions;
+use crate::tools::handlers::normalize_and_validate_additional_permissions;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_app_server_protocol::ConfigLayerSource;
 use codex_execpolicy::Decision;
 use codex_execpolicy::Evaluation;
 use codex_execpolicy::RuleMatch;
-use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::NetworkPermissions;
 use codex_protocol::models::PermissionProfile;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use pretty_assertions::assert_eq;
-use serde::Deserialize;
-use std::collections::HashMap;
 use std::fs;
 use std::sync::Arc;
 use tempfile::tempdir;
@@ -39,89 +36,23 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
        .features
        .enable(Feature::RequestPermissions)
        .expect("test setup should allow enabling request permissions");
-    turn_context_raw
-        .sandbox_policy
-        .set(SandboxPolicy::DangerFullAccess)
-        .expect("test setup should allow updating sandbox policy");
-    let session = Arc::new(session);
-    let turn_context = Arc::new(turn_context_raw);
-
-    let params = ExecParams {
-        command: if cfg!(windows) {
-            vec![
-                "cmd.exe".to_string(),
-                "/C".to_string(),
-                "echo hi".to_string(),
-            ]
-        } else {
-            vec![
-                "/bin/sh".to_string(),
-                "-c".to_string(),
-                "echo hi".to_string(),
-            ]
-        },
-        cwd: turn_context.cwd.clone(),
-        expiration: 1000.into(),
-        env: HashMap::new(),
-        network: None,
-        sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
-        windows_sandbox_level: turn_context.windows_sandbox_level,
-        justification: Some("test".to_string()),
-        arg0: None,
+    let additional_permissions = PermissionProfile {
+        network: Some(NetworkPermissions {
+            enabled: Some(true),
+        }),
+        file_system: None,
+        macos: None,
    };
+    let normalized = normalize_and_validate_additional_permissions(
+        session.features().enabled(Feature::RequestPermissions),
+        turn_context_raw.approval_policy.value(),
+        SandboxPermissions::WithAdditionalPermissions,
+        Some(additional_permissions.clone()),
+        &turn_context_raw.cwd,
+    )
+    .expect("shell additional permissions should pass policy validation");

-    let handler = ShellHandler;
-    let resp = handler
-        .handle(ToolInvocation {
-            session: Arc::clone(&session),
-            turn: Arc::clone(&turn_context),
-            tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
-            call_id: "test-call".to_string(),
-            tool_name: "shell".to_string(),
-            payload: ToolPayload::Function {
-                arguments: serde_json::json!({
-                    "command": params.command.clone(),
-                    "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
-                    "timeout_ms": params.expiration.timeout_ms(),
-                    "sandbox_permissions": params.sandbox_permissions,
-                    "additional_permissions": PermissionProfile {
-                        network: Some(NetworkPermissions {
-                            enabled: Some(true),
-                        }),
-                        file_system: None,
-                        macos: None,
-                    },
-                    "justification": params.justification.clone(),
-                })
-                .to_string(),
-            },
-        })
-        .await;
-
-    let output = match resp.expect("expected Ok result") {
-        ToolOutput::Function {
-            body: FunctionCallOutputBody::Text(content),
-            ..
-        } => content,
-        _ => panic!("unexpected tool output"),
-    };
-
-    #[derive(Deserialize, PartialEq, Eq, Debug)]
-    struct ResponseExecMetadata {
-        exit_code: i32,
-    }
-
-    #[derive(Deserialize)]
-    struct ResponseExecOutput {
-        output: String,
-        metadata: ResponseExecMetadata,
-    }
-
-    let exec_output: ResponseExecOutput =
-        serde_json::from_str(&output).expect("valid exec output json");
-
-    assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
-    assert!(exec_output.output.contains("hi"));
+    assert_eq!(normalized, Some(additional_permissions));
 }

 #[tokio::test]
--- a/codex-rs/core/src/guardian.rs
+++ b/codex-rs/core/src/guardian.rs
@@ -664,12 +664,14 @@ fn truncate_guardian_action_value(value: Value) -> Value {
                .map(truncate_guardian_action_value)
                .collect::<Vec<_>>(),
        ),
-        Value::Object(values) => Value::Object(
-            values
+        Value::Object(values) => {
+            let mut entries = values
                .into_iter()
                .map(|(key, value)| (key, truncate_guardian_action_value(value)))
-                .collect(),
-        ),
+                .collect::<Vec<_>>();
+            entries.sort_by(|(left, _), (right, _)| left.cmp(right));
+            Value::Object(entries.into_iter().collect())
+        }
        other => other,
    }
 }
--- a/codex-rs/core/src/guardian_tests.rs
+++ b/codex-rs/core/src/guardian_tests.rs
@@ -169,8 +169,28 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
        .as_str()
        .expect("test patch should serialize as a string");

+    let change_count_index = rendered
+        .find("\"change_count\"")
+        .expect("rendered json should contain change_count");
+    let cwd_index = rendered
+        .find("\"cwd\"")
+        .expect("rendered json should contain cwd");
+    let files_index = rendered
+        .find("\"files\"")
+        .expect("rendered json should contain files");
+    let patch_index = rendered
+        .find("\"patch\"")
+        .expect("rendered json should contain patch");
+    let tool_index = rendered
+        .find("\"tool\"")
+        .expect("rendered json should contain tool");
+
    assert!(rendered.contains("\"tool\": \"apply_patch\""));
    assert!(rendered.len() < original_patch.len());
+    assert!(change_count_index < cwd_index);
+    assert!(cwd_index < files_index);
+    assert!(files_index < patch_index);
+    assert!(patch_index < tool_index);
 }

 #[test]
--- a/codex-rs/core/src/snapshots/codex_coreguardiantests__guardian_review_request_layout.snap
+++ b/codex-rs/core/src/snapshots/codex_coreguardiantests__guardian_review_request_layout.snap
@@ -1,5 +1,5 @@
 ---
-source: core/src/guardian.rs
+source: core/src/guardian_tests.rs
 expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
 ---
 Scenario: Guardian review request layout
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(

 /// Validates feature/policy constraints for `with_additional_permissions` and
 /// normalizes any path-based permissions. Errors if the request is invalid.
-pub(super) fn normalize_and_validate_additional_permissions(
+pub(crate) fn normalize_and_validate_additional_permissions(
    request_permission_enabled: bool,
    approval_policy: AskForApproval,
    sandbox_permissions: SandboxPermissions,
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval@linux.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval@linux.snap
@@ -0,0 +1,19 @@
+---
+source: tui/src/chatwidget/tests.rs
+expression: popup
+---
+  Experimental features
+  Toggle experimental features. Changes are saved to config.toml.
+
+› [ ] JavaScript REPL              Enable a persistent Node-backed JavaScript REPL for interactive website debugging
+                                   and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
+                                   installed.
+  [ ] Bubblewrap sandbox           Try the new linux sandbox based on bubblewrap.
+  [ ] Multi-agents                 Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
+  [ ] Apps                         Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
+                                   Codex after enabling.
+  [ ] Guardian approvals           Let a guardian subagent review `on-request` approval prompts instead of showing
+                                   them to you, including sandbox escapes and blocked network access.
+  [ ] Prevent sleep while running  Keep your computer awake while Codex is running a thread.
+
+  Press space to select or enter to save for next conversation
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -6949,6 +6949,11 @@ async fn experimental_popup_includes_guardian_approval() {
    chat.open_experimental_popup();

    let popup = render_bottom_popup(&chat, 120);
+    #[cfg(target_os = "linux")]
+    insta::with_settings!({ snapshot_suffix => "linux" }, {
+        assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
+    });
+    #[cfg(not(target_os = "linux"))]
    assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
 }

--- a/defs.bzl
+++ b/defs.bzl
@@ -80,7 +80,7 @@ def codex_rust_crate(
            `CARGO_BIN_EXE_*` environment variables. These are only needed for binaries from a different crate.
    """
    test_env = {
-        "INSTA_WORKSPACE_ROOT": ".",
+        "INSTA_WORKSPACE_ROOT": "codex-rs",
        "INSTA_SNAPSHOT_PATH": "src",
    }

@@ -127,7 +127,9 @@ def codex_rust_crate(
            crate = name,
            env = test_env,
            deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
-            rustc_flags = rustc_flags_extra,
+            # Keep `file!()` paths Cargo-like (`core/src/...`) instead of
+            # Bazel workspace-prefixed (`codex-rs/core/src/...`) for snapshot parity.
+            rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
            rustc_env = rustc_env,
            data = test_data_extra,
            tags = test_tags,
@@ -178,7 +180,7 @@ def codex_rust_crate(
            rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
            rustc_env = rustc_env,
            # Important: do not merge `test_env` here. Its unit-test-only
-            # `INSTA_WORKSPACE_ROOT="."` can point integration tests at the
+            # `INSTA_WORKSPACE_ROOT="codex-rs"` can point integration tests at the
            # runfiles cwd and cause false `.snap.new` churn on Linux.
            env = cargo_env,
            tags = test_tags,