diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 51667c6013..aafcbdd1b9 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -2126,6 +2126,7 @@ dependencies = [ "codex-rmcp-client", "codex-rollout-trace", "codex-sandboxing", + "codex-shell-command", "codex-state", "codex-stdio-to-uds", "codex-terminal-detection", diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml index cdee241b42..8601de5851 100644 --- a/codex-rs/cli/Cargo.toml +++ b/codex-rs/cli/Cargo.toml @@ -44,6 +44,7 @@ codex-responses-api-proxy = { workspace = true } codex-rmcp-client = { workspace = true } codex-rollout-trace = { workspace = true } codex-sandboxing = { workspace = true } +codex-shell-command = { workspace = true } codex-state = { workspace = true } codex-stdio-to-uds = { workspace = true } codex-terminal-detection = { workspace = true } diff --git a/codex-rs/cli/src/debug_sandbox.rs b/codex-rs/cli/src/debug_sandbox.rs index c85da0f5f2..73ca85502c 100644 --- a/codex-rs/cli/src/debug_sandbox.rs +++ b/codex-rs/cli/src/debug_sandbox.rs @@ -35,6 +35,10 @@ use crate::exit_status::handle_exit_status; #[cfg(target_os = "macos")] use seatbelt::DenialLogger; +#[cfg(target_os = "linux")] +const LINUX_SANDBOX_FORWARDED_SIGNALS: &[libc::c_int] = + &[libc::SIGHUP, libc::SIGINT, libc::SIGQUIT, libc::SIGTERM]; + #[cfg(target_os = "macos")] pub async fn run_command_under_seatbelt( command: SeatbeltCommand, @@ -142,6 +146,13 @@ async fn run_command_under_sandbox( // sandbox policy. In the future, we could add a CLI option to set them // separately. let sandbox_policy_cwd = cwd.clone(); + if let Some(reason) = codex_shell_command::metadata_write_forbidden_reason( + &command, + cwd.as_path(), + &config.permissions.file_system_sandbox_policy(), + ) { + anyhow::bail!("{reason}"); + } let env = create_env( &config.permissions.shell_environment_policy, @@ -261,6 +272,9 @@ async fn run_command_under_sandbox( denial_logger.on_child_spawn(&child); } + #[cfg(target_os = "linux")] + let status = wait_for_debug_sandbox_child(&mut child).await?; + #[cfg(not(target_os = "linux"))] let status = child.wait().await?; #[cfg(target_os = "macos")] @@ -438,6 +452,27 @@ async fn spawn_debug_sandbox_child( cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1"); } + #[cfg(target_os = "linux")] + { + let parent_pid = unsafe { libc::getpid() }; + // SAFETY: `pre_exec` runs in the child immediately before exec. The + // closure only adjusts the child signal mask, installs a parent-death + // signal, and checks the inherited parent pid to close the fork/exec + // race. + unsafe { + cmd.pre_exec(move || { + block_linux_sandbox_forwarded_signals()?; + if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 { + return Err(std::io::Error::last_os_error()); + } + if libc::getppid() != parent_pid { + libc::raise(libc::SIGTERM); + } + Ok(()) + }); + } + } + cmd.stdin(Stdio::inherit()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) @@ -445,6 +480,68 @@ async fn spawn_debug_sandbox_child( .spawn() } +#[cfg(target_os = "linux")] +async fn wait_for_debug_sandbox_child( + child: &mut Child, +) -> std::io::Result { + let child_pid = child.id().map(|pid| pid as libc::pid_t); + tokio::select! { + status = child.wait() => status, + signal = recv_linux_sandbox_forwarded_signal() => { + let signal = signal?; + if let Some(child_pid) = child_pid { + signal_debug_sandbox_child(child_pid, signal)?; + } + child.wait().await + } + } +} + +#[cfg(target_os = "linux")] +async fn recv_linux_sandbox_forwarded_signal() -> std::io::Result { + use tokio::signal::unix::SignalKind; + use tokio::signal::unix::signal; + + let mut sighup = signal(SignalKind::hangup())?; + let mut sigint = signal(SignalKind::interrupt())?; + let mut sigquit = signal(SignalKind::quit())?; + let mut sigterm = signal(SignalKind::terminate())?; + + let signal = tokio::select! { + _ = sighup.recv() => libc::SIGHUP, + _ = sigint.recv() => libc::SIGINT, + _ = sigquit.recv() => libc::SIGQUIT, + _ = sigterm.recv() => libc::SIGTERM, + }; + Ok(signal) +} + +#[cfg(target_os = "linux")] +fn signal_debug_sandbox_child(pid: libc::pid_t, signal: libc::c_int) -> std::io::Result<()> { + if unsafe { libc::kill(pid, signal) } < 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() != Some(libc::ESRCH) { + return Err(err); + } + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn block_linux_sandbox_forwarded_signals() -> std::io::Result<()> { + let mut blocked: libc::sigset_t = unsafe { std::mem::zeroed() }; + unsafe { + libc::sigemptyset(&mut blocked); + for signal in LINUX_SANDBOX_FORWARDED_SIGNALS { + libc::sigaddset(&mut blocked, *signal); + } + if libc::sigprocmask(libc::SIG_BLOCK, &blocked, std::ptr::null_mut()) < 0 { + return Err(std::io::Error::last_os_error()); + } + } + Ok(()) +} + #[cfg(target_os = "windows")] mod windows_stdio_bridge { use std::io::Read; diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index b7512b7076..c3e101bf17 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -531,6 +531,14 @@ impl ShellHandler { prefix_rule, }) .await; + let exec_approval_requirement = codex_shell_command::metadata_write_forbidden_reason( + &exec_params.command, + &exec_params.cwd, + &file_system_sandbox_policy, + ) + .map_or(exec_approval_requirement, |reason| { + crate::tools::sandboxing::ExecApprovalRequirement::Forbidden { reason } + }); let req = ShellRequest { command: exec_params.command.clone(), diff --git a/codex-rs/shell-command/src/bash.rs b/codex-rs/shell-command/src/bash.rs index 2fe299108c..9a10ba458b 100644 --- a/codex-rs/shell-command/src/bash.rs +++ b/codex-rs/shell-command/src/bash.rs @@ -119,6 +119,55 @@ pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option Option>> { + let (_, script) = extract_bash_command(command)?; + let tree = try_parse_shell(script)?; + let root = tree.root_node(); + if root.has_error() { + return None; + } + + let mut commands = Vec::new(); + for command_node in find_command_nodes(root) { + if let Some(words) = parse_command_word_prefix_from_node(command_node, script) + && !words.is_empty() + { + commands.push(words); + } + } + + (!commands.is_empty()).then_some(commands) +} + +/// Returns literal write redirection targets within a `bash -lc "..."` or +/// `zsh -lc "..."` invocation. +pub fn parse_shell_lc_write_redirection_targets(command: &[String]) -> Option> { + let (_, script) = extract_bash_command(command)?; + let tree = try_parse_shell(script)?; + let root = tree.root_node(); + if root.has_error() { + return None; + } + + let mut targets = Vec::new(); + for redirect_node in find_nodes_by_kind(root, "file_redirect") { + if file_redirect_uses_write_operator(redirect_node) + && let Some(target) = parse_redirection_target(redirect_node, script) + { + targets.push(target); + } + } + + (!targets.is_empty()).then_some(targets) +} + /// Returns the parsed argv for a single shell command in a here-doc style /// script (`<<`), as long as the script contains exactly one command node. pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option> { @@ -194,6 +243,100 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option, src: &str) -> Option> { + if cmd.kind() != "command" { + return None; + } + let mut words = Vec::new(); + let mut cursor = cmd.walk(); + for child in cmd.named_children(&mut cursor) { + match child.kind() { + "command_name" => { + let word_node = child.named_child(0)?; + if !matches!(word_node.kind(), "word" | "number") { + return None; + } + words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned()); + } + "word" | "number" => { + words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned()); + } + "string" => { + let parsed = parse_double_quoted_string(child, src)?; + words.push(parsed); + } + "raw_string" => { + let parsed = parse_raw_string(child, src)?; + words.push(parsed); + } + "concatenation" => { + let parsed = parse_concatenation(child, src)?; + words.push(parsed); + } + "variable_assignment" | "comment" => {} + kind if is_allowed_heredoc_attachment_kind(kind) => {} + _ => {} + } + } + Some(words) +} + +fn file_redirect_uses_write_operator(node: Node<'_>) -> bool { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if !child.is_named() && child.kind().contains('>') { + return true; + } + } + false +} + +fn parse_redirection_target(node: Node<'_>, src: &str) -> Option { + let mut target = None; + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if let Some(parsed) = parse_literal_shell_word(child, src) { + target = Some(parsed); + } + } + target +} + +fn parse_literal_shell_word(node: Node<'_>, src: &str) -> Option { + match node.kind() { + "word" | "number" => Some(node.utf8_text(src.as_bytes()).ok()?.to_owned()), + "string" => parse_double_quoted_string(node, src), + "raw_string" => parse_raw_string(node, src), + "concatenation" => parse_concatenation(node, src), + _ => None, + } +} + +fn parse_concatenation(node: Node<'_>, src: &str) -> Option { + let mut concatenated = String::new(); + let mut cursor = node.walk(); + for part in node.named_children(&mut cursor) { + match part.kind() { + "word" | "number" => { + concatenated.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str()); + } + "string" => { + let parsed = parse_double_quoted_string(part, src)?; + concatenated.push_str(&parsed); + } + "raw_string" => { + let parsed = parse_raw_string(part, src)?; + concatenated.push_str(&parsed); + } + _ => return None, + } + } + if concatenated.is_empty() { + return None; + } + Some(concatenated) +} + fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option> { if cmd.kind() != "command" { return None; @@ -268,6 +411,29 @@ fn find_single_command_node(root: Node<'_>) -> Option> { single_command } +fn find_command_nodes(root: Node<'_>) -> Vec> { + let mut command_nodes = find_nodes_by_kind(root, "command"); + command_nodes.sort_by_key(Node::start_byte); + command_nodes +} + +fn find_nodes_by_kind<'a>(root: Node<'a>, kind: &str) -> Vec> { + let mut stack = vec![root]; + let mut matches = Vec::new(); + while let Some(node) = stack.pop() { + if node.kind() == kind { + matches.push(node); + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + stack.push(child); + } + } + matches.sort_by_key(Node::start_byte); + matches +} + fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool { let mut stack = vec![node]; while let Some(current) = stack.pop() { @@ -453,6 +619,64 @@ mod tests { assert_eq!(parsed, vec![vec!["ls".to_string()]]); } + #[test] + fn parse_shell_lc_command_word_prefixes_extracts_complex_script_commands() { + let command = vec![ + "bash".to_string(), + "-lc".to_string(), + r#"set -e +top=$(git rev-parse --show-toplevel) +if git init -q; then + exit 22 +fi +if mkdir .codex; then + exit 23 +fi +printf pwned > .git/config +"# + .to_string(), + ]; + + let parsed = parse_shell_lc_command_word_prefixes(&command).expect("parse script"); + + assert!(parsed.contains(&vec![ + "git".to_string(), + "rev-parse".to_string(), + "--show-toplevel".to_string() + ])); + assert!(parsed.contains(&vec![ + "git".to_string(), + "init".to_string(), + "-q".to_string() + ])); + assert!(parsed.contains(&vec!["mkdir".to_string(), ".codex".to_string()])); + } + + #[test] + fn parse_shell_lc_write_redirection_targets_extracts_literal_writes() { + let command = vec![ + "bash".to_string(), + "-lc".to_string(), + r#"printf pwned > .git +cat < .git/config +printf ok >> .codex/log +printf ok > ".agents/config" +"# + .to_string(), + ]; + + let parsed = parse_shell_lc_write_redirection_targets(&command).expect("parse redirects"); + + assert_eq!( + parsed, + vec![ + ".git".to_string(), + ".codex/log".to_string(), + ".agents/config".to_string() + ] + ); + } + #[test] fn accepts_concatenated_flag_and_value() { // Test case: -g"*.py" (flag directly concatenated with quoted value) diff --git a/codex-rs/shell-command/src/lib.rs b/codex-rs/shell-command/src/lib.rs index 1d9e302a4e..7311f22c28 100644 --- a/codex-rs/shell-command/src/lib.rs +++ b/codex-rs/shell-command/src/lib.rs @@ -4,8 +4,10 @@ mod shell_detect; pub mod bash; pub(crate) mod command_safety; +mod metadata_write; pub mod parse_command; pub mod powershell; pub use command_safety::is_dangerous_command; pub use command_safety::is_safe_command; +pub use metadata_write::metadata_write_forbidden_reason; diff --git a/codex-rs/shell-command/src/metadata_write.rs b/codex-rs/shell-command/src/metadata_write.rs new file mode 100644 index 0000000000..ee15786feb --- /dev/null +++ b/codex-rs/shell-command/src/metadata_write.rs @@ -0,0 +1,136 @@ +use std::path::Path; + +use codex_protocol::permissions::FileSystemSandboxPolicy; +use codex_protocol::permissions::forbidden_agent_metadata_write; + +pub fn metadata_write_forbidden_reason( + command: &[String], + cwd: &Path, + file_system_sandbox_policy: &FileSystemSandboxPolicy, +) -> Option { + if let Some(targets) = crate::bash::parse_shell_lc_write_redirection_targets(command) { + for target in targets { + if let Some(name) = + forbidden_agent_metadata_write(Path::new(&target), cwd, file_system_sandbox_policy) + { + return Some(metadata_write_reason(name)); + } + } + } + None +} + +fn metadata_write_reason(name: &str) -> String { + format!("command targets protected workspace metadata path `{name}`") +} + +#[cfg(test)] +mod tests { + use std::path::Path; + use std::path::PathBuf; + + use codex_protocol::permissions::FileSystemSandboxPolicy; + use codex_protocol::protocol::SandboxPolicy; + use pretty_assertions::assert_eq; + + use super::metadata_write_forbidden_reason; + + struct TestDir { + path: PathBuf, + } + + impl TestDir { + fn new(name: &str) -> Self { + let path = std::env::temp_dir().join(format!( + "codex-metadata-write-{name}-{}", + std::process::id() + )); + let _ = std::fs::remove_dir_all(&path); + std::fs::create_dir(&path).expect("create tempdir"); + Self { path } + } + + fn path(&self) -> &Path { + &self.path + } + } + + impl Drop for TestDir { + fn drop(&mut self) { + let _ = std::fs::remove_dir_all(&self.path); + } + } + + fn legacy_workspace_write_policy(cwd: &Path) -> FileSystemSandboxPolicy { + let policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![], + network_access: false, + exclude_tmpdir_env_var: true, + exclude_slash_tmp: true, + }; + FileSystemSandboxPolicy::from_legacy_sandbox_policy_for_cwd(&policy, cwd) + } + + #[test] + fn metadata_write_detector_allows_normal_git_under_parent_repo() { + let repo = TestDir::new("normal-git-under-parent-repo"); + std::fs::create_dir(repo.path().join(".git")).expect("create parent .git"); + let cwd = repo.path().join("sub"); + std::fs::create_dir(&cwd).expect("create cwd"); + let policy = legacy_workspace_write_policy(&cwd); + + let reason = metadata_write_forbidden_reason( + &[ + "/bin/bash".to_string(), + "-lc".to_string(), + "git status --short".to_string(), + ], + &cwd, + &policy, + ); + + assert_eq!(reason, None); + } + + #[test] + fn metadata_write_detector_leaves_direct_writes_to_sandbox_policy() { + let cwd = TestDir::new("direct-metadata-writes"); + let policy = legacy_workspace_write_policy(cwd.path()); + + let reason = metadata_write_forbidden_reason( + &[ + "/bin/bash".to_string(), + "-lc".to_string(), + "touch .git && mkdir -p .codex".to_string(), + ], + cwd.path(), + &policy, + ); + + assert_eq!(reason, None); + } + + #[test] + fn metadata_write_detector_blocks_metadata_redirections() { + let repo = TestDir::new("metadata-write-redirections"); + std::fs::create_dir(repo.path().join(".git")).expect("create parent .git"); + let cwd = repo.path().join("sub"); + std::fs::create_dir(&cwd).expect("create cwd"); + let policy = legacy_workspace_write_policy(&cwd); + + let reason = metadata_write_forbidden_reason( + &[ + "/bin/bash".to_string(), + "-lc".to_string(), + "printf pwned > .git".to_string(), + ], + &cwd, + &policy, + ); + + assert_eq!( + reason, + Some("command targets protected workspace metadata path `.git`".to_string()) + ); + } +}