fix(ci): restore guardian coverage and bazel unit tests (#13912)

## Summary
- restore the guardian review request snapshot test and its tracked
snapshot after it was dropped from `main`
- make Bazel Rust unit-test wrappers resolve runfiles correctly on
manifest-only platforms like macOS and point Insta at the real workspace
root
- harden the shell-escalation socket-closure assertion so the musl Bazel
test no longer depends on fd reuse behavior

## Verification
- cargo test -p codex-core
guardian_review_request_layout_matches_model_visible_request_snapshot
- cargo test -p codex-shell-escalation
- bazel test //codex-rs/exec:exec-unit-tests
//codex-rs/shell-escalation:shell-escalation-unit-tests

Supersedes #13894.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
Co-authored-by: viyatb-oai <viyatb@openai.com>
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charley Cunningham
2026-03-08 12:05:19 -07:00
committed by GitHub
parent a30edb6c17
commit 7ba1fccfc1
13 changed files with 462 additions and 28 deletions

View File

@@ -18,6 +18,12 @@ use codex_protocol::models::NetworkPermissions;
use codex_protocol::models::PermissionProfile;
use codex_utils_absolute_path::AbsolutePathBuf;
use core_test_support::codex_linux_sandbox_exe_or_skip;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use pretty_assertions::assert_eq;
use serde::Deserialize;
use std::collections::HashMap;
@@ -27,6 +33,29 @@ use tempfile::tempdir;
#[tokio::test]
async fn guardian_allows_shell_additional_permissions_requests_past_policy_validation() {
let server = start_mock_server().await;
let _request_log = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-guardian"),
ev_assistant_message(
"msg-guardian",
&serde_json::json!({
"risk_level": "low",
"risk_score": 5,
"rationale": "The request only widens permissions for a benign local echo command.",
"evidence": [{
"message": "The planned command is an `echo hi` smoke test.",
"why": "This is low-risk and does not attempt destructive or exfiltrating behavior.",
}],
})
.to_string(),
),
ev_completed("resp-guardian"),
]),
)
.await;
let (mut session, mut turn_context_raw) = make_session_and_context().await;
turn_context_raw.codex_linux_sandbox_exe = codex_linux_sandbox_exe_or_skip!();
turn_context_raw
@@ -41,10 +70,26 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
.features
.enable(Feature::RequestPermissions)
.expect("test setup should allow enabling request permissions");
turn_context_raw
.sandbox_policy
.set(SandboxPolicy::DangerFullAccess)
.expect("test setup should allow updating sandbox policy");
// This test is about request-permissions validation, not managed sandbox
// policy enforcement. Widen the derived sandbox policies directly so the
// command runs without depending on a platform sandbox binary.
turn_context_raw.file_system_sandbox_policy =
codex_protocol::permissions::FileSystemSandboxPolicy::from(
&SandboxPolicy::DangerFullAccess,
);
turn_context_raw.network_sandbox_policy =
codex_protocol::permissions::NetworkSandboxPolicy::from(&SandboxPolicy::DangerFullAccess);
let mut config = (*turn_context_raw.config).clone();
config.model_provider.base_url = Some(format!("{}/v1", server.uri()));
let config = Arc::new(config);
let models_manager = Arc::new(crate::test_support::models_manager_with_provider(
config.codex_home.clone(),
Arc::clone(&session.services.auth_manager),
config.model_provider.clone(),
));
session.services.models_manager = models_manager;
turn_context_raw.config = Arc::clone(&config);
turn_context_raw.provider = config.model_provider.clone();
let session = Arc::new(session);
let turn_context = Arc::new(turn_context_raw);