mirror of
https://github.com/openai/codex.git
synced 2026-04-30 01:16:54 +00:00
Fix flaky permissions escalation test on Windows (#16825)
Problem: `rejects_escalated_permissions_when_policy_not_on_request` retried a real shell command after asserting the escalation rejection, so Windows CI could fail on command startup timing instead of approval behavior. Solution: Keep the rejection assertion, verify no turn permissions were granted, and assert through exec-policy evaluation that the same command would be allowed without escalation instead of timing a subprocess.
This commit is contained in:
@@ -43,7 +43,6 @@ use crate::state::TaskKind;
|
||||
use crate::tasks::SessionTask;
|
||||
use crate::tasks::SessionTaskContext;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
@@ -120,12 +119,6 @@ use std::time::Duration as StdDuration;
|
||||
#[path = "codex_tests_guardian.rs"]
|
||||
mod guardian_tests;
|
||||
|
||||
use codex_protocol::models::function_call_output_content_items_to_text;
|
||||
|
||||
fn expect_text_tool_output(output: &FunctionToolOutput) -> String {
|
||||
function_call_output_content_items_to_text(&output.body).unwrap_or_default()
|
||||
}
|
||||
|
||||
struct InstructionsTestCase {
|
||||
slug: &'static str,
|
||||
expects_apply_patch_instructions: bool,
|
||||
@@ -5348,7 +5341,9 @@ async fn sample_rollout(
|
||||
#[tokio::test]
|
||||
async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_policy::ExecApprovalRequest;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
@@ -5394,23 +5389,6 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let params2 = ExecParams {
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
command: params.command.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
expiration: timeout_ms.into(),
|
||||
capture_policy: ExecCapturePolicy::ShellTool,
|
||||
env: HashMap::new(),
|
||||
network: None,
|
||||
windows_sandbox_level: turn_context.windows_sandbox_level,
|
||||
windows_sandbox_private_desktop: turn_context
|
||||
.config
|
||||
.permissions
|
||||
.windows_sandbox_private_desktop,
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
|
||||
let tool_name = "shell";
|
||||
@@ -5448,9 +5426,11 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
);
|
||||
|
||||
pretty_assertions::assert_eq!(output, expected);
|
||||
pretty_assertions::assert_eq!(session.granted_turn_permissions().await, None);
|
||||
|
||||
// Now retry the same command WITHOUT escalated permissions; should succeed.
|
||||
// Force DangerFullAccess to avoid platform sandbox dependencies in tests.
|
||||
// The rejection should not poison the non-escalated path for the same
|
||||
// command. Force DangerFullAccess so this check stays focused on approval
|
||||
// policy rather than platform-specific sandbox behavior.
|
||||
let turn_context_mut = Arc::get_mut(&mut turn_context).expect("unique turn context Arc");
|
||||
turn_context_mut
|
||||
.sandbox_policy
|
||||
@@ -5461,45 +5441,22 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
turn_context_mut.network_sandbox_policy =
|
||||
NetworkSandboxPolicy::from(turn_context_mut.sandbox_policy.get());
|
||||
|
||||
let resp2 = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::clone(&turn_diff_tracker),
|
||||
call_id: "test-call-2".to_string(),
|
||||
tool_name: tool_name.to_string(),
|
||||
tool_namespace: None,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params2.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params2.expiration.timeout_ms(),
|
||||
"sandbox_permissions": params2.sandbox_permissions,
|
||||
"justification": params2.justification.clone(),
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
let exec_approval_requirement = session
|
||||
.services
|
||||
.exec_policy
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
command: ¶ms.command,
|
||||
approval_policy: turn_context.approval_policy.value(),
|
||||
sandbox_policy: turn_context.sandbox_policy.get(),
|
||||
file_system_sandbox_policy: &turn_context.file_system_sandbox_policy,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let output = expect_text_tool_output(&resp2.expect("expected Ok result"));
|
||||
|
||||
#[derive(Deserialize, PartialEq, Eq, Debug)]
|
||||
struct ResponseExecMetadata {
|
||||
exit_code: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponseExecOutput {
|
||||
output: String,
|
||||
metadata: ResponseExecMetadata,
|
||||
}
|
||||
|
||||
let exec_output: ResponseExecOutput =
|
||||
serde_json::from_str(&output).expect("valid exec output json");
|
||||
|
||||
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
assert!(matches!(
|
||||
exec_approval_requirement,
|
||||
ExecApprovalRequirement::Skip { .. }
|
||||
));
|
||||
}
|
||||
#[tokio::test]
|
||||
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
|
||||
Reference in New Issue
Block a user