codex: fix guardian snapshot drift in PR CI

This commit is contained in:
Ahmed Ibrahim
2026-03-07 11:20:01 -08:00
parent 5f1a510823
commit 934a0da85a
9 changed files with 74 additions and 95 deletions

View File

@@ -34,7 +34,7 @@ codex_rust_crate(
"models.json",
"prompt.md",
],
test_data_extra = [
test_data_extra = glob(["src/**/snapshots/**"]) + [
"config.schema.json",
# This is a bit of a hack, but empirically, some of our integration tests
# are relying on the presence of this file as a repo root marker. When

View File

@@ -2,24 +2,21 @@ use super::*;
use crate::config_loader::ConfigLayerEntry;
use crate::config_loader::ConfigRequirements;
use crate::config_loader::ConfigRequirementsToml;
use crate::exec::ExecParams;
use crate::exec_policy::ExecPolicyManager;
use crate::features::Feature;
use crate::guardian::GUARDIAN_SUBAGENT_NAME;
use crate::protocol::AskForApproval;
use crate::sandboxing::SandboxPermissions;
use crate::tools::handlers::normalize_and_validate_additional_permissions;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_app_server_protocol::ConfigLayerSource;
use codex_execpolicy::Decision;
use codex_execpolicy::Evaluation;
use codex_execpolicy::RuleMatch;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::NetworkPermissions;
use codex_protocol::models::PermissionProfile;
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::sync::Arc;
use tempfile::tempdir;
@@ -39,89 +36,23 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
.features
.enable(Feature::RequestPermissions)
.expect("test setup should allow enabling request permissions");
turn_context_raw
.sandbox_policy
.set(SandboxPolicy::DangerFullAccess)
.expect("test setup should allow updating sandbox policy");
let session = Arc::new(session);
let turn_context = Arc::new(turn_context_raw);
let params = ExecParams {
command: if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
"echo hi".to_string(),
]
} else {
vec![
"/bin/sh".to_string(),
"-c".to_string(),
"echo hi".to_string(),
]
},
cwd: turn_context.cwd.clone(),
expiration: 1000.into(),
env: HashMap::new(),
network: None,
sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
windows_sandbox_level: turn_context.windows_sandbox_level,
justification: Some("test".to_string()),
arg0: None,
let additional_permissions = PermissionProfile {
network: Some(NetworkPermissions {
enabled: Some(true),
}),
file_system: None,
macos: None,
};
let normalized = normalize_and_validate_additional_permissions(
session.features().enabled(Feature::RequestPermissions),
turn_context_raw.approval_policy.value(),
SandboxPermissions::WithAdditionalPermissions,
Some(additional_permissions.clone()),
&turn_context_raw.cwd,
)
.expect("shell additional permissions should pass policy validation");
let handler = ShellHandler;
let resp = handler
.handle(ToolInvocation {
session: Arc::clone(&session),
turn: Arc::clone(&turn_context),
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
call_id: "test-call".to_string(),
tool_name: "shell".to_string(),
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": params.command.clone(),
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": params.expiration.timeout_ms(),
"sandbox_permissions": params.sandbox_permissions,
"additional_permissions": PermissionProfile {
network: Some(NetworkPermissions {
enabled: Some(true),
}),
file_system: None,
macos: None,
},
"justification": params.justification.clone(),
})
.to_string(),
},
})
.await;
let output = match resp.expect("expected Ok result") {
ToolOutput::Function {
body: FunctionCallOutputBody::Text(content),
..
} => content,
_ => panic!("unexpected tool output"),
};
#[derive(Deserialize, PartialEq, Eq, Debug)]
struct ResponseExecMetadata {
exit_code: i32,
}
#[derive(Deserialize)]
struct ResponseExecOutput {
output: String,
metadata: ResponseExecMetadata,
}
let exec_output: ResponseExecOutput =
serde_json::from_str(&output).expect("valid exec output json");
assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
assert!(exec_output.output.contains("hi"));
assert_eq!(normalized, Some(additional_permissions));
}
#[tokio::test]

View File

@@ -664,12 +664,14 @@ fn truncate_guardian_action_value(value: Value) -> Value {
.map(truncate_guardian_action_value)
.collect::<Vec<_>>(),
),
Value::Object(values) => Value::Object(
values
Value::Object(values) => {
let mut entries = values
.into_iter()
.map(|(key, value)| (key, truncate_guardian_action_value(value)))
.collect(),
),
.collect::<Vec<_>>();
entries.sort_by(|(left, _), (right, _)| left.cmp(right));
Value::Object(entries.into_iter().collect())
}
other => other,
}
}

View File

@@ -169,8 +169,28 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
.as_str()
.expect("test patch should serialize as a string");
let change_count_index = rendered
.find("\"change_count\"")
.expect("rendered json should contain change_count");
let cwd_index = rendered
.find("\"cwd\"")
.expect("rendered json should contain cwd");
let files_index = rendered
.find("\"files\"")
.expect("rendered json should contain files");
let patch_index = rendered
.find("\"patch\"")
.expect("rendered json should contain patch");
let tool_index = rendered
.find("\"tool\"")
.expect("rendered json should contain tool");
assert!(rendered.contains("\"tool\": \"apply_patch\""));
assert!(rendered.len() < original_patch.len());
assert!(change_count_index < cwd_index);
assert!(cwd_index < files_index);
assert!(files_index < patch_index);
assert!(patch_index < tool_index);
}
#[test]

View File

@@ -1,5 +1,5 @@
---
source: core/src/guardian.rs
source: core/src/guardian_tests.rs
expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
---
Scenario: Guardian review request layout

View File

@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(
/// Validates feature/policy constraints for `with_additional_permissions` and
/// normalizes any path-based permissions. Errors if the request is invalid.
pub(super) fn normalize_and_validate_additional_permissions(
pub(crate) fn normalize_and_validate_additional_permissions(
request_permission_enabled: bool,
approval_policy: AskForApproval,
sandbox_permissions: SandboxPermissions,

View File

@@ -0,0 +1,19 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Experimental features
Toggle experimental features. Changes are saved to config.toml.
[ ] JavaScript REPL Enable a persistent Node-backed JavaScript REPL for interactive website debugging
and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
installed.
[ ] Bubblewrap sandbox Try the new linux sandbox based on bubblewrap.
[ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
[ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
Codex after enabling.
[ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing
them to you, including sandbox escapes and blocked network access.
[ ] Prevent sleep while running Keep your computer awake while Codex is running a thread.
Press space to select or enter to save for next conversation

View File

@@ -6949,6 +6949,11 @@ async fn experimental_popup_includes_guardian_approval() {
chat.open_experimental_popup();
let popup = render_bottom_popup(&chat, 120);
#[cfg(target_os = "linux")]
insta::with_settings!({ snapshot_suffix => "linux" }, {
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
});
#[cfg(not(target_os = "linux"))]
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
}

View File

@@ -80,7 +80,7 @@ def codex_rust_crate(
`CARGO_BIN_EXE_*` environment variables. These are only needed for binaries from a different crate.
"""
test_env = {
"INSTA_WORKSPACE_ROOT": ".",
"INSTA_WORKSPACE_ROOT": "codex-rs",
"INSTA_SNAPSHOT_PATH": "src",
}
@@ -127,7 +127,9 @@ def codex_rust_crate(
crate = name,
env = test_env,
deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
rustc_flags = rustc_flags_extra,
# Keep `file!()` paths Cargo-like (`core/src/...`) instead of
# Bazel workspace-prefixed (`codex-rs/core/src/...`) for snapshot parity.
rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
rustc_env = rustc_env,
data = test_data_extra,
tags = test_tags,
@@ -178,7 +180,7 @@ def codex_rust_crate(
rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
rustc_env = rustc_env,
# Important: do not merge `test_env` here. Its unit-test-only
# `INSTA_WORKSPACE_ROOT="."` can point integration tests at the
# `INSTA_WORKSPACE_ROOT="codex-rs"` can point integration tests at the
# runfiles cwd and cause false `.snap.new` churn on Linux.
env = cargo_env,
tags = test_tags,