mirror of
https://github.com/openai/codex.git
synced 2026-03-08 23:53:21 +00:00
Compare commits
3 Commits
dev/flaky-
...
dev/flaky-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0bd20db915 | ||
|
|
dc19e78962 | ||
|
|
3b5fe5ca35 |
23
codex-rs/app-server/src/bin/test_notify_capture.rs
Normal file
23
codex-rs/app-server/src/bin/test_notify_capture.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let mut args = env::args_os().skip(1);
|
||||
let output_path = PathBuf::from(
|
||||
args.next()
|
||||
.ok_or_else(|| anyhow!("missing output path argument"))?,
|
||||
);
|
||||
let payload = args
|
||||
.next()
|
||||
.ok_or_else(|| anyhow!("missing payload argument"))?
|
||||
.into_string()
|
||||
.map_err(|_| anyhow!("payload must be valid UTF-8"))?;
|
||||
|
||||
let temp_path = output_path.with_extension("json.tmp");
|
||||
std::fs::write(&temp_path, payload)?;
|
||||
std::fs::rename(&temp_path, &output_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -14,6 +14,7 @@ use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
use codex_app_server_protocol::TurnStartResponse;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_utils_cargo_bin::cargo_bin;
|
||||
use core_test_support::fs_wait;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
@@ -191,29 +192,22 @@ async fn turn_start_notify_payload_includes_initialize_client_name() -> Result<(
|
||||
let responses = vec![create_final_assistant_message_sse_response("Done")?];
|
||||
let server = create_mock_responses_server_sequence_unchecked(responses).await;
|
||||
let codex_home = TempDir::new()?;
|
||||
let notify_script = codex_home.path().join("notify.py");
|
||||
std::fs::write(
|
||||
¬ify_script,
|
||||
r#"from pathlib import Path
|
||||
import sys
|
||||
|
||||
payload_path = Path(__file__).with_name("notify.json")
|
||||
tmp_path = payload_path.with_suffix(".json.tmp")
|
||||
tmp_path.write_text(sys.argv[-1], encoding="utf-8")
|
||||
tmp_path.replace(payload_path)
|
||||
"#,
|
||||
)?;
|
||||
let notify_file = codex_home.path().join("notify.json");
|
||||
let notify_script = notify_script
|
||||
let notify_capture = cargo_bin("test_notify_capture")?;
|
||||
let notify_capture = notify_capture
|
||||
.to_str()
|
||||
.expect("notify script path should be valid UTF-8");
|
||||
.expect("notify capture path should be valid UTF-8");
|
||||
let notify_file = notify_file
|
||||
.to_str()
|
||||
.expect("notify output path should be valid UTF-8");
|
||||
create_config_toml_with_extra(
|
||||
codex_home.path(),
|
||||
&server.uri(),
|
||||
"never",
|
||||
&format!(
|
||||
"notify = [\"python3\", {}]",
|
||||
toml_basic_string(notify_script)
|
||||
"notify = [{}, {}]",
|
||||
toml_basic_string(notify_capture),
|
||||
toml_basic_string(notify_file)
|
||||
),
|
||||
)?;
|
||||
|
||||
@@ -261,8 +255,9 @@ tmp_path.replace(payload_path)
|
||||
)
|
||||
.await??;
|
||||
|
||||
fs_wait::wait_for_path_exists(¬ify_file, Duration::from_secs(5)).await?;
|
||||
let payload_raw = tokio::fs::read_to_string(¬ify_file).await?;
|
||||
let notify_file = Path::new(notify_file);
|
||||
fs_wait::wait_for_path_exists(notify_file, Duration::from_secs(5)).await?;
|
||||
let payload_raw = tokio::fs::read_to_string(notify_file).await?;
|
||||
let payload: Value = serde_json::from_str(&payload_raw)?;
|
||||
assert_eq!(payload["client"], "xcode");
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ use codex_protocol::user_input::UserInput;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Weak;
|
||||
use tokio::sync::watch;
|
||||
use tracing::debug;
|
||||
|
||||
const AGENT_NAMES: &str = include_str!("agent_names.txt");
|
||||
const FORKED_SPAWN_AGENT_OUTPUT_MESSAGE: &str = "You are the newly spawned agent. The prior conversation history was forked from your parent agent. Treat the next user message as your new task, and use the forked history only as background context.";
|
||||
@@ -212,8 +213,9 @@ impl AgentControl {
|
||||
// TODO(jif) add helper for drain
|
||||
state.notify_thread_created(new_thread.thread_id);
|
||||
|
||||
self.maybe_start_completion_watcher(new_thread.thread_id, notification_source)
|
||||
.await;
|
||||
self.send_input(new_thread.thread_id, items).await?;
|
||||
self.maybe_start_completion_watcher(new_thread.thread_id, notification_source);
|
||||
|
||||
Ok(new_thread.thread_id)
|
||||
}
|
||||
@@ -288,7 +290,8 @@ impl AgentControl {
|
||||
// Resumed threads are re-registered in-memory and need the same listener
|
||||
// attachment path as freshly spawned threads.
|
||||
state.notify_thread_created(resumed_thread.thread_id);
|
||||
self.maybe_start_completion_watcher(resumed_thread.thread_id, Some(notification_source));
|
||||
self.maybe_start_completion_watcher(resumed_thread.thread_id, Some(notification_source))
|
||||
.await;
|
||||
|
||||
Ok(resumed_thread.thread_id)
|
||||
}
|
||||
@@ -418,7 +421,7 @@ impl AgentControl {
|
||||
///
|
||||
/// This is only enabled for `SubAgentSource::ThreadSpawn`, where a parent thread exists and
|
||||
/// can receive completion notifications.
|
||||
fn maybe_start_completion_watcher(
|
||||
async fn maybe_start_completion_watcher(
|
||||
&self,
|
||||
child_thread_id: ThreadId,
|
||||
session_source: Option<SessionSource>,
|
||||
@@ -429,13 +432,20 @@ impl AgentControl {
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
let status_rx = self.subscribe_status(child_thread_id).await.ok();
|
||||
let control = self.clone();
|
||||
tokio::spawn(async move {
|
||||
let status = match control.subscribe_status(child_thread_id).await {
|
||||
Ok(mut status_rx) => {
|
||||
let status = match status_rx {
|
||||
Some(mut status_rx) => {
|
||||
debug!(%child_thread_id, "subagent completion watcher attached");
|
||||
let mut status = status_rx.borrow().clone();
|
||||
while !is_final(&status) {
|
||||
if status_rx.changed().await.is_err() {
|
||||
debug!(
|
||||
%child_thread_id,
|
||||
"subagent completion watcher lost status stream; reading latest status"
|
||||
);
|
||||
status = control.get_status(child_thread_id).await;
|
||||
break;
|
||||
}
|
||||
@@ -443,9 +453,20 @@ impl AgentControl {
|
||||
}
|
||||
status
|
||||
}
|
||||
Err(_) => control.get_status(child_thread_id).await,
|
||||
None => {
|
||||
debug!(
|
||||
%child_thread_id,
|
||||
"subagent completion watcher could not subscribe; reading latest status"
|
||||
);
|
||||
control.get_status(child_thread_id).await
|
||||
}
|
||||
};
|
||||
if !is_final(&status) {
|
||||
debug!(
|
||||
%child_thread_id,
|
||||
?status,
|
||||
"subagent completion watcher exiting before final status"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1331,15 +1352,18 @@ mod tests {
|
||||
let (parent_thread_id, parent_thread) = harness.start_thread().await;
|
||||
let child_thread_id = ThreadId::new();
|
||||
|
||||
harness.control.maybe_start_completion_watcher(
|
||||
child_thread_id,
|
||||
Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
|
||||
parent_thread_id,
|
||||
depth: 1,
|
||||
agent_nickname: None,
|
||||
agent_role: Some("explorer".to_string()),
|
||||
})),
|
||||
);
|
||||
harness
|
||||
.control
|
||||
.maybe_start_completion_watcher(
|
||||
child_thread_id,
|
||||
Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
|
||||
parent_thread_id,
|
||||
depth: 1,
|
||||
agent_nickname: None,
|
||||
agent_role: Some("explorer".to_string()),
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(wait_for_subagent_notification(&parent_thread).await, true);
|
||||
|
||||
|
||||
@@ -2702,8 +2702,9 @@ impl Session {
|
||||
/// Emit an exec approval request event and await the user's decision.
|
||||
///
|
||||
/// The request is keyed by `call_id` + `approval_id` so matching responses
|
||||
/// are delivered to the correct in-flight turn. If the task is aborted,
|
||||
/// this returns the default `ReviewDecision` (`Denied`).
|
||||
/// are delivered to the correct in-flight turn. If the pending approval is
|
||||
/// cleared before a response arrives, treat it as an abort so interrupted
|
||||
/// turns do not continue on a synthetic denial.
|
||||
///
|
||||
/// Note that if `available_decisions` is `None`, then the other fields will
|
||||
/// be used to derive the available decisions via
|
||||
@@ -2777,7 +2778,7 @@ impl Session {
|
||||
parsed_cmd,
|
||||
});
|
||||
self.send_event(turn_context, event).await;
|
||||
rx_approve.await.unwrap_or_default()
|
||||
rx_approve.await.unwrap_or(ReviewDecision::Abort)
|
||||
}
|
||||
|
||||
pub async fn request_patch_approval(
|
||||
@@ -6859,6 +6860,10 @@ async fn try_run_sampling_request(
|
||||
|
||||
drain_in_flight(&mut in_flight, sess.clone(), turn_context.clone()).await?;
|
||||
|
||||
if cancellation_token.is_cancelled() {
|
||||
return Err(CodexErr::TurnAborted);
|
||||
}
|
||||
|
||||
if should_emit_turn_diff {
|
||||
let unified_diff = {
|
||||
let mut tracker = turn_diff_tracker.lock().await;
|
||||
|
||||
@@ -673,6 +673,32 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn root_write_policy_with_carveouts_still_uses_platform_sandbox() {
|
||||
let blocked = AbsolutePathBuf::resolve_path_against_base(
|
||||
"blocked",
|
||||
std::env::current_dir().expect("current dir"),
|
||||
)
|
||||
.expect("blocked path");
|
||||
let policy = FileSystemSandboxPolicy::restricted(vec![
|
||||
FileSystemSandboxEntry {
|
||||
path: FileSystemPath::Special {
|
||||
value: FileSystemSpecialPath::Root,
|
||||
},
|
||||
access: FileSystemAccessMode::Write,
|
||||
},
|
||||
FileSystemSandboxEntry {
|
||||
path: FileSystemPath::Path { path: blocked },
|
||||
access: FileSystemAccessMode::None,
|
||||
},
|
||||
]);
|
||||
|
||||
assert_eq!(
|
||||
should_require_platform_sandbox(&policy, NetworkSandboxPolicy::Enabled, false),
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_access_restricted_policy_still_uses_platform_sandbox_for_restricted_network() {
|
||||
let policy = FileSystemSandboxPolicy::restricted(vec![FileSystemSandboxEntry {
|
||||
|
||||
@@ -123,6 +123,25 @@ impl Default for FileSystemSandboxPolicy {
|
||||
}
|
||||
|
||||
impl FileSystemSandboxPolicy {
|
||||
fn has_root_access(&self, predicate: impl Fn(FileSystemAccessMode) -> bool) -> bool {
|
||||
matches!(self.kind, FileSystemSandboxKind::Restricted)
|
||||
&& self.entries.iter().any(|entry| {
|
||||
matches!(
|
||||
&entry.path,
|
||||
FileSystemPath::Special { value }
|
||||
if matches!(value, FileSystemSpecialPath::Root) && predicate(entry.access)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn has_explicit_deny_entries(&self) -> bool {
|
||||
matches!(self.kind, FileSystemSandboxKind::Restricted)
|
||||
&& self
|
||||
.entries
|
||||
.iter()
|
||||
.any(|entry| entry.access == FileSystemAccessMode::None)
|
||||
}
|
||||
|
||||
pub fn unrestricted() -> Self {
|
||||
Self {
|
||||
kind: FileSystemSandboxKind::Unrestricted,
|
||||
@@ -148,13 +167,10 @@ impl FileSystemSandboxPolicy {
|
||||
pub fn has_full_disk_read_access(&self) -> bool {
|
||||
match self.kind {
|
||||
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
|
||||
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
|
||||
matches!(
|
||||
&entry.path,
|
||||
FileSystemPath::Special { value }
|
||||
if matches!(value, FileSystemSpecialPath::Root) && entry.access.can_read()
|
||||
)
|
||||
}),
|
||||
FileSystemSandboxKind::Restricted => {
|
||||
self.has_root_access(FileSystemAccessMode::can_read)
|
||||
&& !self.has_explicit_deny_entries()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,14 +178,10 @@ impl FileSystemSandboxPolicy {
|
||||
pub fn has_full_disk_write_access(&self) -> bool {
|
||||
match self.kind {
|
||||
FileSystemSandboxKind::Unrestricted | FileSystemSandboxKind::ExternalSandbox => true,
|
||||
FileSystemSandboxKind::Restricted => self.entries.iter().any(|entry| {
|
||||
matches!(
|
||||
&entry.path,
|
||||
FileSystemPath::Special { value }
|
||||
if matches!(value, FileSystemSpecialPath::Root)
|
||||
&& entry.access.can_write()
|
||||
)
|
||||
}),
|
||||
FileSystemSandboxKind::Restricted => {
|
||||
self.has_root_access(FileSystemAccessMode::can_write)
|
||||
&& !self.has_explicit_deny_entries()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,11 +206,24 @@ impl FileSystemSandboxPolicy {
|
||||
}
|
||||
|
||||
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
|
||||
let mut readable_roots = Vec::new();
|
||||
if self.has_root_access(FileSystemAccessMode::can_read)
|
||||
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
|
||||
{
|
||||
readable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
|
||||
}
|
||||
|
||||
dedup_absolute_paths(
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|entry| entry.access.can_read())
|
||||
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
|
||||
readable_roots
|
||||
.into_iter()
|
||||
.chain(
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|entry| entry.access.can_read())
|
||||
.filter_map(|entry| {
|
||||
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
|
||||
}),
|
||||
)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
@@ -212,11 +237,24 @@ impl FileSystemSandboxPolicy {
|
||||
|
||||
let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd).ok();
|
||||
let unreadable_roots = self.get_unreadable_roots_with_cwd(cwd);
|
||||
let mut writable_roots = Vec::new();
|
||||
if self.has_root_access(FileSystemAccessMode::can_write)
|
||||
&& let Some(cwd_absolute) = cwd_absolute.as_ref()
|
||||
{
|
||||
writable_roots.push(absolute_root_path_for_cwd(cwd_absolute));
|
||||
}
|
||||
|
||||
dedup_absolute_paths(
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|entry| entry.access.can_write())
|
||||
.filter_map(|entry| resolve_file_system_path(&entry.path, cwd_absolute.as_ref()))
|
||||
writable_roots
|
||||
.into_iter()
|
||||
.chain(
|
||||
self.entries
|
||||
.iter()
|
||||
.filter(|entry| entry.access.can_write())
|
||||
.filter_map(|entry| {
|
||||
resolve_file_system_path(&entry.path, cwd_absolute.as_ref())
|
||||
}),
|
||||
)
|
||||
.collect(),
|
||||
)
|
||||
.into_iter()
|
||||
@@ -543,6 +581,16 @@ fn resolve_file_system_path(
|
||||
}
|
||||
}
|
||||
|
||||
fn absolute_root_path_for_cwd(cwd: &AbsolutePathBuf) -> AbsolutePathBuf {
|
||||
let root = cwd
|
||||
.as_path()
|
||||
.ancestors()
|
||||
.last()
|
||||
.unwrap_or_else(|| panic!("cwd must have a filesystem root"));
|
||||
AbsolutePathBuf::from_absolute_path(root)
|
||||
.unwrap_or_else(|err| panic!("cwd root must be an absolute path: {err}"))
|
||||
}
|
||||
|
||||
fn resolve_file_system_special_path(
|
||||
value: &FileSystemSpecialPath,
|
||||
cwd: Option<&AbsolutePathBuf>,
|
||||
|
||||
@@ -3352,6 +3352,56 @@ mod tests {
|
||||
assert!(writable.has_full_disk_write_access());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restricted_file_system_policy_treats_root_with_carveouts_as_scoped_access() {
|
||||
let cwd = TempDir::new().expect("tempdir");
|
||||
let cwd_absolute =
|
||||
AbsolutePathBuf::from_absolute_path(cwd.path()).expect("absolute tempdir");
|
||||
let root = cwd_absolute
|
||||
.as_path()
|
||||
.ancestors()
|
||||
.last()
|
||||
.and_then(|path| AbsolutePathBuf::from_absolute_path(path).ok())
|
||||
.expect("filesystem root");
|
||||
let blocked = AbsolutePathBuf::resolve_path_against_base("blocked", cwd.path())
|
||||
.expect("resolve blocked");
|
||||
let policy = FileSystemSandboxPolicy::restricted(vec![
|
||||
FileSystemSandboxEntry {
|
||||
path: FileSystemPath::Special {
|
||||
value: FileSystemSpecialPath::Root,
|
||||
},
|
||||
access: FileSystemAccessMode::Write,
|
||||
},
|
||||
FileSystemSandboxEntry {
|
||||
path: FileSystemPath::Path {
|
||||
path: blocked.clone(),
|
||||
},
|
||||
access: FileSystemAccessMode::None,
|
||||
},
|
||||
]);
|
||||
|
||||
assert!(!policy.has_full_disk_read_access());
|
||||
assert!(!policy.has_full_disk_write_access());
|
||||
assert_eq!(
|
||||
policy.get_readable_roots_with_cwd(cwd.path()),
|
||||
vec![root.clone()]
|
||||
);
|
||||
assert_eq!(
|
||||
policy.get_unreadable_roots_with_cwd(cwd.path()),
|
||||
vec![blocked.clone()]
|
||||
);
|
||||
|
||||
let writable_roots = policy.get_writable_roots_with_cwd(cwd.path());
|
||||
assert_eq!(writable_roots.len(), 1);
|
||||
assert_eq!(writable_roots[0].root, root);
|
||||
assert!(
|
||||
writable_roots[0]
|
||||
.read_only_subpaths
|
||||
.iter()
|
||||
.any(|path| path.as_path() == blocked.as_path())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restricted_file_system_policy_derives_effective_paths() {
|
||||
let cwd = TempDir::new().expect("tempdir");
|
||||
|
||||
Reference in New Issue
Block a user