Compare commits

..

5 Commits

Author SHA1 Message Date
Ahmed Ibrahim
eacecf126e Stabilize thread unsubscribe app-server test (#13940)
## What changed
- This PR rewrites the unsubscribe test to stop using a blind delay
after the unsubscribe request.
- The test now polls the captured `/responses` request count until it
stabilizes and fails immediately if another outbound request appears.

## Why this fixes the flake
- The previous version slept for a fixed interval and then asserted,
which meant it was sampling the system at an arbitrary point in time.
- On slower runners the turn had not fully settled yet, and on faster
runners the same sleep just wasted time.
- Polling the request count makes the assertion depend on the actual
quiescent state we care about: no more model follow-up requests after
the unsubscribe/interrupt path.

## Scope
- Test-only change.
2026-03-08 16:41:17 -07:00
Ahmed Ibrahim
d67f2c5604 codex: restore app-server runtime deps 2026-03-07 23:16:58 -08:00
Ahmed Ibrahim
a00827bdaa codex: stabilize app-server notify initialize test 2026-03-07 23:16:58 -08:00
Ahmed Ibrahim
dc19e78962 Stabilize abort task follow-up handling (#13874)
- production logic plus tests; cancel running tasks before clearing
pending turn state
- suppress follow-up model requests after cancellation and assert on
stabilized request counts instead of fixed sleeps
2026-03-07 22:56:00 -08:00
Michael Bolin
3b5fe5ca35 protocol: keep root carveouts sandboxed (#13452)
## Why

A restricted filesystem policy that grants `:root` read or write access
but also carries explicit deny entries should still behave like scoped
access with carveouts, not like unrestricted disk access.

Without that distinction, later platform backends cannot preserve
blocked subpaths under root-level permissions because the protocol layer
reports the policy as fully unrestricted.

## What changed

- taught `FileSystemSandboxPolicy` to treat root access plus explicit
deny entries as scoped access rather than full-disk access
- derived readable and writable roots from the filesystem root when root
access is combined with carveouts, while preserving the denied paths as
read-only subpaths
- added protocol coverage for root-write policies with carveouts and a
core sandboxing regression so those policies still require platform
sandboxing

## Verification

- added protocol coverage in `protocol/src/permissions.rs` and
`protocol/src/protocol.rs` for root access with explicit carveouts
- added platform-sandbox regression coverage in
`core/src/sandboxing/mod.rs`
- verified the current PR state with `just clippy`




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/13452).
* #13453
* __->__ #13452
* #13451
* #13449
* #13448
* #13445
* #13440
* #13439

---------

Co-authored-by: viyatb-oai <viyatb@openai.com>
2026-03-07 21:15:47 -08:00
10 changed files with 326 additions and 68 deletions

View File

@@ -8,6 +8,10 @@ license.workspace = true
name = "codex-app-server"
path = "src/main.rs"
[[bin]]
name = "codex-app-server-test-notify-capture"
path = "src/bin/notify_capture.rs"
[lib]
name = "codex_app_server"
path = "src/lib.rs"

View File

@@ -0,0 +1,44 @@
use std::env;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use anyhow::Context;
use anyhow::Result;
use anyhow::anyhow;
use anyhow::bail;
fn main() -> Result<()> {
let mut args = env::args_os();
let _program = args.next();
let output_path = PathBuf::from(
args.next()
.ok_or_else(|| anyhow!("expected output path as first argument"))?,
);
let payload = args
.next()
.ok_or_else(|| anyhow!("expected payload as final argument"))?;
if args.next().is_some() {
bail!("expected payload as final argument");
}
let payload = payload.to_string_lossy();
let temp_path = PathBuf::from(format!("{}.tmp", output_path.display()));
let mut file = File::create(&temp_path)
.with_context(|| format!("failed to create {}", temp_path.display()))?;
file.write_all(payload.as_bytes())
.with_context(|| format!("failed to write {}", temp_path.display()))?;
file.sync_all()
.with_context(|| format!("failed to sync {}", temp_path.display()))?;
fs::rename(&temp_path, &output_path).with_context(|| {
format!(
"failed to move {} into {}",
temp_path.display(),
output_path.display()
)
})?;
Ok(())
}

View File

@@ -0,0 +1,23 @@
use anyhow::Result;
use anyhow::anyhow;
use std::env;
use std::path::PathBuf;
fn main() -> Result<()> {
let mut args = env::args_os().skip(1);
let output_path = PathBuf::from(
args.next()
.ok_or_else(|| anyhow!("missing output path argument"))?,
);
let payload = args
.next()
.ok_or_else(|| anyhow!("missing payload argument"))?
.into_string()
.map_err(|_| anyhow!("payload must be valid UTF-8"))?;
let temp_path = output_path.with_extension("json.tmp");
std::fs::write(&temp_path, payload)?;
std::fs::rename(&temp_path, &output_path)?;
Ok(())
}

View File

@@ -14,6 +14,7 @@ use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_utils_cargo_bin::cargo_bin;
use core_test_support::fs_wait;
use pretty_assertions::assert_eq;
use serde_json::Value;
@@ -191,29 +192,22 @@ async fn turn_start_notify_payload_includes_initialize_client_name() -> Result<(
let responses = vec![create_final_assistant_message_sse_response("Done")?];
let server = create_mock_responses_server_sequence_unchecked(responses).await;
let codex_home = TempDir::new()?;
let notify_script = codex_home.path().join("notify.py");
std::fs::write(
&notify_script,
r#"from pathlib import Path
import sys
payload_path = Path(__file__).with_name("notify.json")
tmp_path = payload_path.with_suffix(".json.tmp")
tmp_path.write_text(sys.argv[-1], encoding="utf-8")
tmp_path.replace(payload_path)
"#,
)?;
let notify_file = codex_home.path().join("notify.json");
let notify_script = notify_script
let notify_capture = cargo_bin("codex-app-server-test-notify-capture")?;
let notify_capture = notify_capture
.to_str()
.expect("notify script path should be valid UTF-8");
.expect("notify capture path should be valid UTF-8");
let notify_file_str = notify_file
.to_str()
.expect("notify file path should be valid UTF-8");
create_config_toml_with_extra(
codex_home.path(),
&server.uri(),
"never",
&format!(
"notify = [\"python3\", {}]",
toml_basic_string(notify_script)
"notify = [{}, {}]",
toml_basic_string(notify_capture),
toml_basic_string(notify_file_str)
),
)?;
@@ -297,6 +291,9 @@ model_provider = "mock_provider"
{extra}
[features]
shell_snapshot = false
[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"

View File

@@ -34,6 +34,51 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
async fn wait_for_responses_request_count_to_stabilize(
server: &wiremock::MockServer,
expected_count: usize,
settle_duration: std::time::Duration,
) -> Result<()> {
timeout(DEFAULT_READ_TIMEOUT, async {
let mut stable_since: Option<tokio::time::Instant> = None;
loop {
let requests = server
.received_requests()
.await
.context("failed to fetch received requests")?;
let responses_request_count = requests
.iter()
.filter(|request| {
request.method == "POST" && request.url.path().ends_with("/responses")
})
.count();
if responses_request_count > expected_count {
anyhow::bail!(
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
);
}
if responses_request_count == expected_count {
match stable_since {
Some(stable_since) if stable_since.elapsed() >= settle_duration => {
return Ok::<(), anyhow::Error>(());
}
None => stable_since = Some(tokio::time::Instant::now()),
Some(_) => {}
}
} else {
stable_since = None;
}
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
})
.await??;
Ok(())
}
#[tokio::test]
async fn thread_unsubscribe_unloads_thread_and_emits_thread_closed_notification() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -168,6 +213,13 @@ async fn thread_unsubscribe_during_turn_interrupts_turn_and_emits_thread_closed(
};
assert_eq!(payload.thread_id, thread_id);
wait_for_responses_request_count_to_stabilize(
&server,
1,
std::time::Duration::from_millis(200),
)
.await?;
Ok(())
}

View File

@@ -2702,8 +2702,9 @@ impl Session {
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `call_id` + `approval_id` so matching responses
/// are delivered to the correct in-flight turn. If the task is aborted,
/// this returns the default `ReviewDecision` (`Denied`).
/// are delivered to the correct in-flight turn. If the pending approval is
/// cleared before a response arrives, treat it as an abort so interrupted
/// turns do not continue on a synthetic denial.
///
/// Note that if `available_decisions` is `None`, then the other fields will
/// be used to derive the available decisions via
@@ -2777,7 +2778,7 @@ impl Session {
parsed_cmd,
});
self.send_event(turn_context, event).await;
rx_approve.await.unwrap_or_default()
rx_approve.await.unwrap_or(ReviewDecision::Abort)
}
pub async fn request_patch_approval(
@@ -6859,6 +6860,10 @@ async fn try_run_sampling_request(
drain_in_flight(&mut in_flight, sess.clone(), turn_context.clone()).await?;
if cancellation_token.is_cancelled() {
return Err(CodexErr::TurnAborted);
}
if should_emit_turn_diff {
let unified_diff = {
let mut tracker = turn_diff_tracker.lock().await;

View File

@@ -673,6 +673,32 @@ mod tests {
);
}
#[test]
fn root_write_policy_with_carveouts_still_uses_platform_sandbox() {
let blocked = AbsolutePathBuf::resolve_path_against_base(
"blocked",
std::env::current_dir().expect("current dir"),
)
.expect("blocked path");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Write,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path { path: blocked },
access: FileSystemAccessMode::None,
},
]);
assert_eq!(
should_require_platform_sandbox(&policy, NetworkSandboxPolicy::Enabled, false),
true
);
}
#[test]
fn full_access_restricted_policy_still_uses_platform_sandbox_for_restricted_network() {
let policy = FileSystemSandboxPolicy::restricted(vec![FileSystemSandboxEntry {

View File

@@ -123,6 +123,25 @@ impl Default for FileSystemSandboxPolicy {
}
impl FileSystemSandboxPolicy {
fn has_root_access(&self, predicate: impl Fn(FileSystemAccessMode) -> bool) -> bool {
matches!(self.kind, FileSystemSandboxKind::Restricted)
&& self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root) && predicate(entry.access)
)
})
}
fn has_explicit_deny_entries(&self) -> bool {
matches!(self.kind, FileSystemSandboxKind::Restricted)
&& self
.entries
.iter()
.any(|entry| entry.access == FileSystemAccessMode::None)
}
pub fn unrestricted() -> Self {
Self {
kind: FileSystemSandboxKind::Unrestricted,
@@ -148,13 +167,10 @@ impl FileSystemSandboxPolicy {
pub fn has_full_disk_read_access(&self) -> bool {
match self.kind {
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root) && entry.access.can_read()
)
}),
FileSystemSandboxKind::Restricted => {
self.has_root_access(FileSystemAccessMode::can_read)
&& !self.has_explicit_deny_entries()
}
}
}
@@ -162,14 +178,10 @@ impl FileSystemSandboxPolicy {
pub fn has_full_disk_write_access(&self) -> bool {
match self.kind {
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root)
&& entry.access.can_write()
)
}),
FileSystemSandboxKind::Restricted => {
self.has_root_access(FileSystemAccessMode::can_write)
&& !self.has_explicit_deny_entries()
}
}
}
@@ -194,11 +206,24 @@ impl FileSystemSandboxPolicy {
}
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
let mut readable_roots = Vec::new();
if self.has_root_access(FileSystemAccessMode::can_read)
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
{
readable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
}
dedup_absolute_paths(
self.entries
.iter()
.filter(|entry| entry.access.can_read())
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
readable_roots
.into_iter()
.chain(
self.entries
.iter()
.filter(|entry| entry.access.can_read())
.filter_map(|entry| {
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
}),
)
.collect(),
)
}
@@ -212,11 +237,24 @@ impl FileSystemSandboxPolicy {
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
let unreadable_roots = self.get_unreadable_roots_with_cwd(cwd);
let mut writable_roots = Vec::new();
if self.has_root_access(FileSystemAccessMode::can_write)
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
{
writable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
}
dedup_absolute_paths(
self.entries
.iter()
.filter(|entry| entry.access.can_write())
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
writable_roots
.into_iter()
.chain(
self.entries
.iter()
.filter(|entry| entry.access.can_write())
.filter_map(|entry| {
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
}),
)
.collect(),
)
.into_iter()
@@ -543,6 +581,16 @@ fn resolve_file_system_path(
}
}
fn absolute_root_path_for_cwd(cwd: &AbsolutePathBuf) -> AbsolutePathBuf {
let root = cwd
.as_path()
.ancestors()
.last()
.unwrap_or_else(|| panic!("cwd must have a filesystem root"));
AbsolutePathBuf::from_absolute_path(root)
.unwrap_or_else(|err| panic!("cwd root must be an absolute path: {err}"))
}
fn resolve_file_system_special_path(
value: &FileSystemSpecialPath,
cwd: Option<&AbsolutePathBuf>,

View File

@@ -3352,6 +3352,56 @@ mod tests {
assert!(writable.has_full_disk_write_access());
}
#[test]
fn restricted_file_system_policy_treats_root_with_carveouts_as_scoped_access() {
let cwd = TempDir::new().expect("tempdir");
let cwd_absolute =
AbsolutePathBuf::from_absolute_path(cwd.path()).expect("absolute tempdir");
let root = cwd_absolute
.as_path()
.ancestors()
.last()
.and_then(|path| AbsolutePathBuf::from_absolute_path(path).ok())
.expect("filesystem root");
let blocked = AbsolutePathBuf::resolve_path_against_base("blocked", cwd.path())
.expect("resolve blocked");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Write,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path {
path: blocked.clone(),
},
access: FileSystemAccessMode::None,
},
]);
assert!(!policy.has_full_disk_read_access());
assert!(!policy.has_full_disk_write_access());
assert_eq!(
policy.get_readable_roots_with_cwd(cwd.path()),
vec![root.clone()]
);
assert_eq!(
policy.get_unreadable_roots_with_cwd(cwd.path()),
vec![blocked.clone()]
);
let writable_roots = policy.get_writable_roots_with_cwd(cwd.path());
assert_eq!(writable_roots.len(), 1);
assert_eq!(writable_roots[0].root, root);
assert!(
writable_roots[0]
.read_only_subpaths
.iter()
.any(|path| path.as_path() == blocked.as_path())
);
}
#[test]
fn restricted_file_system_policy_derives_effective_paths() {
let cwd = TempDir::new().expect("tempdir");

View File

@@ -130,36 +130,52 @@ async fn collect_output_until_exit(
}
async fn wait_for_python_repl_ready(
writer: &tokio::sync::mpsc::Sender<Vec<u8>>,
output_rx: &mut tokio::sync::broadcast::Receiver<Vec<u8>>,
timeout_ms: u64,
ready_marker: &str,
newline: &str,
) -> anyhow::Result<Vec<u8>> {
let mut collected = Vec::new();
let marker = "__codex_pty_ready__";
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_millis(timeout_ms);
let probe_window = tokio::time::Duration::from_millis(if cfg!(windows) { 750 } else { 250 });
while tokio::time::Instant::now() < deadline {
let now = tokio::time::Instant::now();
let remaining = deadline.saturating_duration_since(now);
match tokio::time::timeout(remaining, output_rx.recv()).await {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
if String::from_utf8_lossy(&collected).contains(ready_marker) {
return Ok(collected);
writer
.send(format!("print('{marker}'){newline}").into_bytes())
.await?;
let probe_deadline = tokio::time::Instant::now() + probe_window;
loop {
let now = tokio::time::Instant::now();
if now >= deadline || now >= probe_deadline {
break;
}
let remaining = std::cmp::min(
deadline.saturating_duration_since(now),
probe_deadline.saturating_duration_since(now),
);
match tokio::time::timeout(remaining, output_rx.recv()).await {
Ok(Ok(chunk)) => {
collected.extend_from_slice(&chunk);
if String::from_utf8_lossy(&collected).contains(marker) {
return Ok(collected);
}
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => continue,
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => {
anyhow::bail!(
"PTY output closed while waiting for Python REPL readiness: {:?}",
String::from_utf8_lossy(&collected)
);
}
Err(_) => break,
}
Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => continue,
Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => {
anyhow::bail!(
"PTY output closed while waiting for Python REPL readiness: {:?}",
String::from_utf8_lossy(&collected)
);
}
Err(_) => break,
}
}
anyhow::bail!(
"timed out waiting for Python REPL readiness marker {ready_marker:?} in PTY: {:?}",
"timed out waiting for Python REPL readiness in PTY: {:?}",
String::from_utf8_lossy(&collected)
);
}
@@ -238,17 +254,10 @@ async fn pty_python_repl_emits_output_and_exits() -> anyhow::Result<()> {
return Ok(());
};
let ready_marker = "__codex_pty_ready__";
let args = vec![
"-i".to_string(),
"-q".to_string(),
"-c".to_string(),
format!("print('{ready_marker}')"),
];
let env_map: HashMap<String, String> = std::env::vars().collect();
let spawned = spawn_pty_process(
&python,
&args,
&[],
Path::new("."),
&env_map,
&None,
@@ -260,7 +269,7 @@ async fn pty_python_repl_emits_output_and_exits() -> anyhow::Result<()> {
let newline = if cfg!(windows) { "\r\n" } else { "\n" };
let startup_timeout_ms = if cfg!(windows) { 10_000 } else { 5_000 };
let mut output =
wait_for_python_repl_ready(&mut output_rx, startup_timeout_ms, ready_marker).await?;
wait_for_python_repl_ready(&writer, &mut output_rx, startup_timeout_ms, newline).await?;
writer
.send(format!("print('hello from pty'){newline}").into_bytes())
.await?;