Compare commits

...

12 Commits

Author SHA1 Message Date
Ahmed Ibrahim
6f1b07acc7 codex: stabilize guardian core tests
(cherry picked from commit 201e8a125cb9e199380f287e986204f5b0ceeb4b)
2026-03-07 12:39:42 -08:00
Ahmed Ibrahim
f09e789648 codex: stabilize guardian popup test across platforms 2026-03-07 12:35:49 -08:00
Ahmed Ibrahim
86d19aca9a codex: use public guardian snapshot macro 2026-03-07 12:23:12 -08:00
Ahmed Ibrahim
c805ab00b7 codex: fix guardian snapshot clippy follow-up 2026-03-07 12:21:37 -08:00
Ahmed Ibrahim
5f7a93d2f7 codex: fix guardian snapshot clippy lint 2026-03-07 12:13:57 -08:00
Ahmed Ibrahim
30927f0451 codex: stabilize guardian snapshot source path 2026-03-07 12:05:14 -08:00
Ahmed Ibrahim
6147a9e452 codex: fix guardian snapshot source metadata 2026-03-07 12:01:20 -08:00
Ahmed Ibrahim
435dfac7f6 codex: stabilize guardian snapshot under Bazel 2026-03-07 11:51:39 -08:00
Ahmed Ibrahim
f3d0031d6b codex: drop unrelated guardian snapshot changes 2026-03-07 11:39:07 -08:00
Ahmed Ibrahim
6e0d7ea1a6 codex: align Bazel snapshot source remap 2026-03-07 11:28:10 -08:00
Ahmed Ibrahim
ef14480821 codex: fix guardian snapshot drift in PR CI 2026-03-07 11:21:16 -08:00
Ahmed Ibrahim
c3f9a95ddb codex: stabilize PTY Python REPL test 2026-03-07 11:07:29 -08:00
8 changed files with 50 additions and 66 deletions

View File

@@ -34,7 +34,7 @@ codex_rust_crate(
"models.json",
"prompt.md",
],
test_data_extra = [
test_data_extra = glob(["src/**/snapshots/**"]) + [
"config.schema.json",
# This is a bit of a hack, but empirically, some of our integration tests
# are relying on the presence of this file as a repo root marker. When

View File

@@ -43,6 +43,10 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
.sandbox_policy
.set(SandboxPolicy::DangerFullAccess)
.expect("test setup should allow updating sandbox policy");
turn_context_raw.file_system_sandbox_policy =
FileSystemSandboxPolicy::from(turn_context_raw.sandbox_policy.get());
turn_context_raw.network_sandbox_policy =
NetworkSandboxPolicy::from(turn_context_raw.sandbox_policy.get());
let session = Arc::new(session);
let turn_context = Arc::new(turn_context_raw);

View File

@@ -19,6 +19,7 @@ use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use insta::assert_snapshot;
use insta::with_settings;
use pretty_assertions::assert_eq;
use std::collections::BTreeMap;
use std::path::PathBuf;
@@ -337,14 +338,14 @@ async fn guardian_review_request_layout_matches_model_visible_request_snapshot()
assert_eq!(assessment.risk_score, 35);
let request = request_log.single_request();
assert_snapshot!(
"guardian_review_request_layout",
context_snapshot::format_labeled_requests_snapshot(
"Guardian review request layout",
&[("Guardian Review Request", &request)],
&ContextSnapshotOptions::default(),
)
let snapshot_text = context_snapshot::format_labeled_requests_snapshot(
"Guardian review request layout",
&[("Guardian Review Request", &request)],
&ContextSnapshotOptions::default(),
);
with_settings!({ omit_expression => true }, {
assert_snapshot!("guardian_review_request_layout", snapshot_text);
});
Ok(())
}

View File

@@ -1,6 +1,5 @@
---
source: core/src/guardian.rs
expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
source: core/src/guardian_tests.rs
---
Scenario: Guardian review request layout
@@ -25,6 +24,6 @@ Scenario: Guardian review request layout
[11] Sandbox denied outbound git push to github.com.\n\n
[12] Assess the exact planned action below. Use read-only tool checks when local state matters.\n
[13] Planned action JSON:\n
[14] {\n "command": [\n "git",\n "push",\n "origin",\n "guardian-approval-mvp"\n ],\n "cwd": "/repo/codex-rs/core",\n "justification": "Need to push the reviewed docs fix to the repo remote.",\n "sandbox_permissions": "use_default",\n "tool": "shell"\n}\n
[14] {\n "tool": "shell",\n "command": [\n "git",\n "push",\n "origin",\n "guardian-approval-mvp"\n ],\n "cwd": "/repo/codex-rs/core",\n "sandbox_permissions": "use_default",\n "justification": "Need to push the reviewed docs fix to the repo remote."\n}\n
[15] >>> APPROVAL REQUEST END\n
[16] You may use read-only tool checks to gather any additional context you need to make a high-confidence determination.\n\nYour final message must be strict JSON with this exact schema:\n{\n "risk_level": "low" | "medium" | "high",\n "risk_score": 0-100,\n "rationale": string,\n "evidence": [{"message": string, "why": string}]\n}\n

View File

@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(
/// Validates feature/policy constraints for `with_additional_permissions` and
/// normalizes any path-based permissions. Errors if the request is invalid.
pub(super) fn normalize_and_validate_additional_permissions(
pub(crate) fn normalize_and_validate_additional_permissions(
request_permission_enabled: bool,
approval_policy: AskForApproval,
sandbox_permissions: SandboxPermissions,

View File

@@ -1,18 +0,0 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Experimental features
Toggle experimental features. Changes are saved to config.toml.
[ ] JavaScript REPL Enable a persistent Node-backed JavaScript REPL for interactive website debugging
and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
installed.
[ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
[ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
Codex after enabling.
[ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing
them to you, including sandbox escapes and blocked network access.
[ ] Prevent sleep while running Keep your computer awake while Codex is running a thread.
Press space to select or enter to save for next conversation

View File

@@ -6949,7 +6949,14 @@ async fn experimental_popup_includes_guardian_approval() {
chat.open_experimental_popup();
let popup = render_bottom_popup(&chat, 120);
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
assert!(
popup.contains("Guardian approvals"),
"expected guardian approvals entry in experimental popup, got:\n{popup}"
);
assert!(
popup.contains("blocked network access"),
"expected guardian approvals description in experimental popup, got:\n{popup}"
);
}
#[tokio::test]

View File

@@ -130,52 +130,36 @@ async fn collect_output_until_exit(
}
async fn wait_for_python_repl_ready(
writer: &tokio::sync::mpsc::Sender<Vec<u8>>,
output_rx: &mut tokio::sync::broadcast::Receiver<Vec<u8>>,
timeout_ms: u64,
newline: &str,
ready_marker: &str,
) -> anyhow::Result<Vec<u8>> {
let mut collected = Vec::new();
let marker = "__codex_pty_ready__";
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_millis(timeout_ms);
let probe_window = tokio::time::Duration::from_millis(if cfg!(windows) { 750 } else { 250 });
while tokio::time::Instant::now() < deadline {
writer
.send(format!("print('{marker}'){newline}").into_bytes())
.await?;
let probe_deadline = tokio::time::Instant::now() + probe_window;
loop {
let now = tokio::time::Instant::now();
if now >= deadline || now >= probe_deadline {
break;
}
let remaining = std::cmp::min(
deadline.saturating_duration_since(now),
probe_deadline.saturating_duration_since(now),
);
match tokio::time::timeout(remaining, output_rx.recv()).await {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
if String::from_utf8_lossy(&collected).contains(marker) {
return Ok(collected);
}
let now = tokio::time::Instant::now();
let remaining = deadline.saturating_duration_since(now);
match tokio::time::timeout(remaining, output_rx.recv()).await {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
if String::from_utf8_lossy(&collected).contains(ready_marker) {
return Ok(collected);
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => continue,
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => {
anyhow::bail!(
"PTY output closed while waiting for Python REPL readiness: {:?}",
String::from_utf8_lossy(&collected)
);
}
Err(_) => break,
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => continue,
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => {
anyhow::bail!(
"PTY output closed while waiting for Python REPL readiness: {:?}",
String::from_utf8_lossy(&collected)
);
}
Err(_) => break,
}
}
anyhow::bail!(
"timed out waiting for Python REPL readiness in PTY: {:?}",
"timed out waiting for Python REPL readiness marker {ready_marker:?} in PTY: {:?}",
String::from_utf8_lossy(&collected)
);
}
@@ -254,10 +238,17 @@ async fn pty_python_repl_emits_output_and_exits() -> anyhow::Result<()> {
return Ok(());
};
let ready_marker = "__codex_pty_ready__";
let args = vec![
"-i".to_string(),
"-q".to_string(),
"-c".to_string(),
format!("print('{ready_marker}')"),
];
let env_map: HashMap<String, String> = std::env::vars().collect();
let spawned = spawn_pty_process(
&python,
&[],
&args,
Path::new("."),
&env_map,
&None,
@@ -269,7 +260,7 @@ async fn pty_python_repl_emits_output_and_exits() -> anyhow::Result<()> {
let newline = if cfg!(windows) { "\r\n" } else { "\n" };
let startup_timeout_ms = if cfg!(windows) { 10_000 } else { 5_000 };
let mut output =
wait_for_python_repl_ready(&writer, &mut output_rx, startup_timeout_ms, newline).await?;
wait_for_python_repl_ready(&mut output_rx, startup_timeout_ms, ready_marker).await?;
writer
.send(format!("print('hello from pty'){newline}").into_bytes())
.await?;