Compare commits

...

3 Commits

Author SHA1 Message Date
Ahmed Ibrahim
0bd20db915 Attach subagent completion watcher before input 2026-03-07 23:40:14 -08:00
Ahmed Ibrahim
dc19e78962 Stabilize abort task follow-up handling (#13874)
- production logic plus tests; cancel running tasks before clearing
pending turn state
- suppress follow-up model requests after cancellation and assert on
stabilized request counts instead of fixed sleeps
2026-03-07 22:56:00 -08:00
Michael Bolin
3b5fe5ca35 protocol: keep root carveouts sandboxed (#13452)
## Why

A restricted filesystem policy that grants `:root` read or write access
but also carries explicit deny entries should still behave like scoped
access with carveouts, not like unrestricted disk access.

Without that distinction, later platform backends cannot preserve
blocked subpaths under root-level permissions because the protocol layer
reports the policy as fully unrestricted.

## What changed

- taught `FileSystemSandboxPolicy` to treat root access plus explicit
deny entries as scoped access rather than full-disk access
- derived readable and writable roots from the filesystem root when root
access is combined with carveouts, while preserving the denied paths as
read-only subpaths
- added protocol coverage for root-write policies with carveouts and a
core sandboxing regression so those policies still require platform
sandboxing

## Verification

- added protocol coverage in `protocol/src/permissions.rs` and
`protocol/src/protocol.rs` for root access with explicit carveouts
- added platform-sandbox regression coverage in
`core/src/sandboxing/mod.rs`
- verified the current PR state with `just clippy`




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/13452).
* #13453
* __->__ #13452
* #13451
* #13449
* #13448
* #13445
* #13440
* #13439

---------

Co-authored-by: viyatb-oai <viyatb@openai.com>
2026-03-07 21:15:47 -08:00
7 changed files with 230 additions and 59 deletions

View File

@@ -0,0 +1,23 @@
use anyhow::Result;
use anyhow::anyhow;
use std::env;
use std::path::PathBuf;
fn main() -> Result<()> {
let mut args = env::args_os().skip(1);
let output_path = PathBuf::from(
args.next()
.ok_or_else(|| anyhow!("missing output path argument"))?,
);
let payload = args
.next()
.ok_or_else(|| anyhow!("missing payload argument"))?
.into_string()
.map_err(|_| anyhow!("payload must be valid UTF-8"))?;
let temp_path = output_path.with_extension("json.tmp");
std::fs::write(&temp_path, payload)?;
std::fs::rename(&temp_path, &output_path)?;
Ok(())
}

View File

@@ -14,6 +14,7 @@ use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_utils_cargo_bin::cargo_bin;
use core_test_support::fs_wait;
use pretty_assertions::assert_eq;
use serde_json::Value;
@@ -191,29 +192,22 @@ async fn turn_start_notify_payload_includes_initialize_client_name() -> Result<(
let responses = vec![create_final_assistant_message_sse_response("Done")?];
let server = create_mock_responses_server_sequence_unchecked(responses).await;
let codex_home = TempDir::new()?;
let notify_script = codex_home.path().join("notify.py");
std::fs::write(
&notify_script,
r#"from pathlib import Path
import sys
payload_path = Path(__file__).with_name("notify.json")
tmp_path = payload_path.with_suffix(".json.tmp")
tmp_path.write_text(sys.argv[-1], encoding="utf-8")
tmp_path.replace(payload_path)
"#,
)?;
let notify_file = codex_home.path().join("notify.json");
let notify_script = notify_script
let notify_capture = cargo_bin("test_notify_capture")?;
let notify_capture = notify_capture
.to_str()
.expect("notify script path should be valid UTF-8");
.expect("notify capture path should be valid UTF-8");
let notify_file = notify_file
.to_str()
.expect("notify output path should be valid UTF-8");
create_config_toml_with_extra(
codex_home.path(),
&server.uri(),
"never",
&format!(
"notify = [\"python3\", {}]",
toml_basic_string(notify_script)
"notify = [{}, {}]",
toml_basic_string(notify_capture),
toml_basic_string(notify_file)
),
)?;
@@ -261,8 +255,9 @@ tmp_path.replace(payload_path)
)
.await??;
fs_wait::wait_for_path_exists(&notify_file, Duration::from_secs(5)).await?;
let payload_raw = tokio::fs::read_to_string(&notify_file).await?;
let notify_file = Path::new(notify_file);
fs_wait::wait_for_path_exists(notify_file, Duration::from_secs(5)).await?;
let payload_raw = tokio::fs::read_to_string(notify_file).await?;
let payload: Value = serde_json::from_str(&payload_raw)?;
assert_eq!(payload["client"], "xcode");

View File

@@ -25,6 +25,7 @@ use codex_protocol::user_input::UserInput;
use std::sync::Arc;
use std::sync::Weak;
use tokio::sync::watch;
use tracing::debug;
const AGENT_NAMES: &str = include_str!("agent_names.txt");
const FORKED_SPAWN_AGENT_OUTPUT_MESSAGE: &str = "You are the newly spawned agent. The prior conversation history was forked from your parent agent. Treat the next user message as your new task, and use the forked history only as background context.";
@@ -212,8 +213,9 @@ impl AgentControl {
// TODO(jif) add helper for drain
state.notify_thread_created(new_thread.thread_id);
self.maybe_start_completion_watcher(new_thread.thread_id, notification_source)
.await;
self.send_input(new_thread.thread_id, items).await?;
self.maybe_start_completion_watcher(new_thread.thread_id, notification_source);
Ok(new_thread.thread_id)
}
@@ -288,7 +290,8 @@ impl AgentControl {
// Resumed threads are re-registered in-memory and need the same listener
// attachment path as freshly spawned threads.
state.notify_thread_created(resumed_thread.thread_id);
self.maybe_start_completion_watcher(resumed_thread.thread_id, Some(notification_source));
self.maybe_start_completion_watcher(resumed_thread.thread_id, Some(notification_source))
.await;
Ok(resumed_thread.thread_id)
}
@@ -418,7 +421,7 @@ impl AgentControl {
///
/// This is only enabled for `SubAgentSource::ThreadSpawn`, where a parent thread exists and
/// can receive completion notifications.
fn maybe_start_completion_watcher(
async fn maybe_start_completion_watcher(
&self,
child_thread_id: ThreadId,
session_source: Option<SessionSource>,
@@ -429,13 +432,20 @@ impl AgentControl {
else {
return;
};
let status_rx = self.subscribe_status(child_thread_id).await.ok();
let control = self.clone();
tokio::spawn(async move {
let status = match control.subscribe_status(child_thread_id).await {
Ok(mut status_rx) => {
let status = match status_rx {
Some(mut status_rx) => {
debug!(%child_thread_id, "subagent completion watcher attached");
let mut status = status_rx.borrow().clone();
while !is_final(&status) {
if status_rx.changed().await.is_err() {
debug!(
%child_thread_id,
"subagent completion watcher lost status stream; reading latest status"
);
status = control.get_status(child_thread_id).await;
break;
}
@@ -443,9 +453,20 @@ impl AgentControl {
}
status
}
Err(_) => control.get_status(child_thread_id).await,
None => {
debug!(
%child_thread_id,
"subagent completion watcher could not subscribe; reading latest status"
);
control.get_status(child_thread_id).await
}
};
if !is_final(&status) {
debug!(
%child_thread_id,
?status,
"subagent completion watcher exiting before final status"
);
return;
}
@@ -1331,15 +1352,18 @@ mod tests {
let (parent_thread_id, parent_thread) = harness.start_thread().await;
let child_thread_id = ThreadId::new();
harness.control.maybe_start_completion_watcher(
child_thread_id,
Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
parent_thread_id,
depth: 1,
agent_nickname: None,
agent_role: Some("explorer".to_string()),
})),
);
harness
.control
.maybe_start_completion_watcher(
child_thread_id,
Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
parent_thread_id,
depth: 1,
agent_nickname: None,
agent_role: Some("explorer".to_string()),
})),
)
.await;
assert_eq!(wait_for_subagent_notification(&parent_thread).await, true);

View File

@@ -2702,8 +2702,9 @@ impl Session {
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `call_id` + `approval_id` so matching responses
/// are delivered to the correct in-flight turn. If the task is aborted,
/// this returns the default `ReviewDecision` (`Denied`).
/// are delivered to the correct in-flight turn. If the pending approval is
/// cleared before a response arrives, treat it as an abort so interrupted
/// turns do not continue on a synthetic denial.
///
/// Note that if `available_decisions` is `None`, then the other fields will
/// be used to derive the available decisions via
@@ -2777,7 +2778,7 @@ impl Session {
parsed_cmd,
});
self.send_event(turn_context, event).await;
rx_approve.await.unwrap_or_default()
rx_approve.await.unwrap_or(ReviewDecision::Abort)
}
pub async fn request_patch_approval(
@@ -6859,6 +6860,10 @@ async fn try_run_sampling_request(
drain_in_flight(&mut in_flight, sess.clone(), turn_context.clone()).await?;
if cancellation_token.is_cancelled() {
return Err(CodexErr::TurnAborted);
}
if should_emit_turn_diff {
let unified_diff = {
let mut tracker = turn_diff_tracker.lock().await;

View File

@@ -673,6 +673,32 @@ mod tests {
);
}
#[test]
fn root_write_policy_with_carveouts_still_uses_platform_sandbox() {
let blocked = AbsolutePathBuf::resolve_path_against_base(
"blocked",
std::env::current_dir().expect("current dir"),
)
.expect("blocked path");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Write,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path { path: blocked },
access: FileSystemAccessMode::None,
},
]);
assert_eq!(
should_require_platform_sandbox(&policy, NetworkSandboxPolicy::Enabled, false),
true
);
}
#[test]
fn full_access_restricted_policy_still_uses_platform_sandbox_for_restricted_network() {
let policy = FileSystemSandboxPolicy::restricted(vec![FileSystemSandboxEntry {

View File

@@ -123,6 +123,25 @@ impl Default for FileSystemSandboxPolicy {
}
impl FileSystemSandboxPolicy {
fn has_root_access(&self, predicate: impl Fn(FileSystemAccessMode) -> bool) -> bool {
matches!(self.kind, FileSystemSandboxKind::Restricted)
&& self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root) && predicate(entry.access)
)
})
}
fn has_explicit_deny_entries(&self) -> bool {
matches!(self.kind, FileSystemSandboxKind::Restricted)
&& self
.entries
.iter()
.any(|entry| entry.access == FileSystemAccessMode::None)
}
pub fn unrestricted() -> Self {
Self {
kind: FileSystemSandboxKind::Unrestricted,
@@ -148,13 +167,10 @@ impl FileSystemSandboxPolicy {
pub fn has_full_disk_read_access(&self) -> bool {
match self.kind {
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root) && entry.access.can_read()
)
}),
FileSystemSandboxKind::Restricted => {
self.has_root_access(FileSystemAccessMode::can_read)
&& !self.has_explicit_deny_entries()
}
}
}
@@ -162,14 +178,10 @@ impl FileSystemSandboxPolicy {
pub fn has_full_disk_write_access(&self) -> bool {
match self.kind {
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
matches!(
&entry.path,
FileSystemPath::Special { value }
if matches!(value, FileSystemSpecialPath::Root)
&& entry.access.can_write()
)
}),
FileSystemSandboxKind::Restricted => {
self.has_root_access(FileSystemAccessMode::can_write)
&& !self.has_explicit_deny_entries()
}
}
}
@@ -194,11 +206,24 @@ impl FileSystemSandboxPolicy {
}
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
let mut readable_roots = Vec::new();
if self.has_root_access(FileSystemAccessMode::can_read)
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
{
readable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
}
dedup_absolute_paths(
self.entries
.iter()
.filter(|entry| entry.access.can_read())
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
readable_roots
.into_iter()
.chain(
self.entries
.iter()
.filter(|entry| entry.access.can_read())
.filter_map(|entry| {
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
}),
)
.collect(),
)
}
@@ -212,11 +237,24 @@ impl FileSystemSandboxPolicy {
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
let unreadable_roots = self.get_unreadable_roots_with_cwd(cwd);
let mut writable_roots = Vec::new();
if self.has_root_access(FileSystemAccessMode::can_write)
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
{
writable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
}
dedup_absolute_paths(
self.entries
.iter()
.filter(|entry| entry.access.can_write())
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
writable_roots
.into_iter()
.chain(
self.entries
.iter()
.filter(|entry| entry.access.can_write())
.filter_map(|entry| {
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
}),
)
.collect(),
)
.into_iter()
@@ -543,6 +581,16 @@ fn resolve_file_system_path(
}
}
fn absolute_root_path_for_cwd(cwd: &AbsolutePathBuf) -> AbsolutePathBuf {
let root = cwd
.as_path()
.ancestors()
.last()
.unwrap_or_else(|| panic!("cwd must have a filesystem root"));
AbsolutePathBuf::from_absolute_path(root)
.unwrap_or_else(|err| panic!("cwd root must be an absolute path: {err}"))
}
fn resolve_file_system_special_path(
value: &FileSystemSpecialPath,
cwd: Option<&AbsolutePathBuf>,

View File

@@ -3352,6 +3352,56 @@ mod tests {
assert!(writable.has_full_disk_write_access());
}
#[test]
fn restricted_file_system_policy_treats_root_with_carveouts_as_scoped_access() {
let cwd = TempDir::new().expect("tempdir");
let cwd_absolute =
AbsolutePathBuf::from_absolute_path(cwd.path()).expect("absolute tempdir");
let root = cwd_absolute
.as_path()
.ancestors()
.last()
.and_then(|path| AbsolutePathBuf::from_absolute_path(path).ok())
.expect("filesystem root");
let blocked = AbsolutePathBuf::resolve_path_against_base("blocked", cwd.path())
.expect("resolve blocked");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Write,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path {
path: blocked.clone(),
},
access: FileSystemAccessMode::None,
},
]);
assert!(!policy.has_full_disk_read_access());
assert!(!policy.has_full_disk_write_access());
assert_eq!(
policy.get_readable_roots_with_cwd(cwd.path()),
vec![root.clone()]
);
assert_eq!(
policy.get_unreadable_roots_with_cwd(cwd.path()),
vec![blocked.clone()]
);
let writable_roots = policy.get_writable_roots_with_cwd(cwd.path());
assert_eq!(writable_roots.len(), 1);
assert_eq!(writable_roots[0].root, root);
assert!(
writable_roots[0]
.read_only_subpaths
.iter()
.any(|path| path.as_path() == blocked.as_path())
);
}
#[test]
fn restricted_file_system_policy_derives_effective_paths() {
let cwd = TempDir::new().expect("tempdir");