Compare commits

...

2 Commits

Author SHA1 Message Date
jif-oai
1d44e5e3df Extract command safety crate 2026-02-10 13:31:18 +00:00
jif-oai
5e236a9b30 Extract command safety crate 2026-02-10 13:13:25 +00:00
20 changed files with 285 additions and 20 deletions

4
MODULE.bazel.lock generated
View File

@@ -608,6 +608,10 @@
"arrayvec_0.7.6": "{\"dependencies\":[{\"kind\":\"dev\",\"name\":\"bencher\",\"req\":\"^0.1.4\"},{\"default_features\":false,\"name\":\"borsh\",\"optional\":true,\"req\":\"^1.2.0\"},{\"kind\":\"dev\",\"name\":\"matches\",\"req\":\"^0.1\"},{\"default_features\":false,\"name\":\"serde\",\"optional\":true,\"req\":\"^1.0\"},{\"kind\":\"dev\",\"name\":\"serde_test\",\"req\":\"^1.0\"},{\"default_features\":false,\"name\":\"zeroize\",\"optional\":true,\"req\":\"^1.4\"}],\"features\":{\"default\":[\"std\"],\"std\":[]}}",
"ascii-canvas_3.0.0": "{\"dependencies\":[{\"kind\":\"dev\",\"name\":\"diff\",\"req\":\"^0.1\"},{\"name\":\"term\",\"req\":\"^0.7\"}],\"features\":{}}",
"ascii_1.1.0": "{\"dependencies\":[{\"name\":\"serde\",\"optional\":true,\"req\":\"^1.0.25\"},{\"name\":\"serde_test\",\"optional\":true,\"req\":\"^1.0\"}],\"features\":{\"alloc\":[],\"default\":[\"std\"],\"std\":[\"alloc\"]}}",
"askama_0.15.4": "{\"dependencies\":[{\"default_features\":false,\"name\":\"askama_macros\",\"optional\":true,\"req\":\"=0.15.4\"},{\"kind\":\"dev\",\"name\":\"assert_matches\",\"req\":\"^1.5.0\"},{\"kind\":\"dev\",\"name\":\"criterion\",\"req\":\"^0.8\"},{\"name\":\"itoa\",\"req\":\"^1.0.11\"},{\"default_features\":false,\"name\":\"percent-encoding\",\"optional\":true,\"req\":\"^2.1.0\"},{\"default_features\":false,\"name\":\"serde\",\"optional\":true,\"req\":\"^1.0\"},{\"default_features\":false,\"name\":\"serde_json\",\"optional\":true,\"req\":\"^1.0\"}],\"features\":{\"alloc\":[\"askama_macros?/alloc\",\"serde?/alloc\",\"serde_json?/alloc\",\"percent-encoding?/alloc\"],\"code-in-doc\":[\"askama_macros?/code-in-doc\"],\"config\":[\"askama_macros?/config\"],\"default\":[\"config\",\"derive\",\"std\",\"urlencode\"],\"derive\":[\"dep:askama_macros\",\"dep:askama_macros\"],\"full\":[\"default\",\"code-in-doc\",\"serde_json\"],\"nightly-spans\":[\"askama_macros/nightly-spans\"],\"serde_json\":[\"std\",\"askama_macros?/serde_json\",\"dep:serde\",\"dep:serde_json\"],\"std\":[\"alloc\",\"askama_macros?/std\",\"serde?/std\",\"serde_json?/std\",\"percent-encoding?/std\"],\"urlencode\":[\"askama_macros?/urlencode\",\"dep:percent-encoding\"]}}",
"askama_derive_0.15.4": "{\"dependencies\":[{\"name\":\"basic-toml\",\"optional\":true,\"req\":\"^0.1.1\"},{\"kind\":\"dev\",\"name\":\"console\",\"req\":\"^0.16.0\"},{\"kind\":\"dev\",\"name\":\"criterion\",\"req\":\"^0.8\"},{\"name\":\"memchr\",\"req\":\"^2\"},{\"name\":\"parser\",\"package\":\"askama_parser\",\"req\":\"=0.15.4\"},{\"kind\":\"dev\",\"name\":\"prettyplease\",\"req\":\"^0.2.20\"},{\"default_features\":false,\"name\":\"proc-macro2\",\"req\":\"^1\"},{\"default_features\":false,\"name\":\"pulldown-cmark\",\"optional\":true,\"req\":\"^0.13.0\"},{\"default_features\":false,\"name\":\"quote\",\"req\":\"^1\"},{\"name\":\"rustc-hash\",\"req\":\"^2.0.0\"},{\"name\":\"serde\",\"optional\":true,\"req\":\"^1.0\"},{\"name\":\"serde_derive\",\"optional\":true,\"req\":\"^1.0\"},{\"kind\":\"dev\",\"name\":\"similar\",\"req\":\"^2.6.0\"},{\"default_features\":false,\"features\":[\"clone-impls\",\"derive\",\"full\",\"parsing\",\"printing\"],\"name\":\"syn\",\"req\":\"^2.0.3\"}],\"features\":{\"alloc\":[],\"code-in-doc\":[\"dep:pulldown-cmark\"],\"config\":[\"external-sources\",\"dep:basic-toml\",\"dep:serde\",\"dep:serde_derive\",\"parser/config\"],\"default\":[\"alloc\",\"code-in-doc\",\"config\",\"external-sources\",\"proc-macro\",\"serde_json\",\"std\",\"urlencode\"],\"external-sources\":[],\"nightly-spans\":[],\"proc-macro\":[\"proc-macro2/proc-macro\"],\"serde_json\":[],\"std\":[\"alloc\"],\"urlencode\":[]}}",
"askama_macros_0.15.4": "{\"dependencies\":[{\"default_features\":false,\"features\":[\"external-sources\",\"proc-macro\"],\"name\":\"askama_derive\",\"package\":\"askama_derive\",\"req\":\"=0.15.4\"}],\"features\":{\"alloc\":[\"askama_derive/alloc\"],\"code-in-doc\":[\"askama_derive/code-in-doc\"],\"config\":[\"askama_derive/config\"],\"default\":[\"config\",\"derive\",\"std\",\"urlencode\"],\"derive\":[],\"full\":[\"default\",\"code-in-doc\",\"serde_json\"],\"nightly-spans\":[\"askama_derive/nightly-spans\"],\"serde_json\":[\"askama_derive/serde_json\"],\"std\":[\"askama_derive/std\"],\"urlencode\":[\"askama_derive/urlencode\"]}}",
"askama_parser_0.15.4": "{\"dependencies\":[{\"kind\":\"dev\",\"name\":\"criterion\",\"req\":\"^0.8\"},{\"name\":\"rustc-hash\",\"req\":\"^2.0.0\"},{\"name\":\"serde\",\"optional\":true,\"req\":\"^1.0\"},{\"name\":\"serde_derive\",\"optional\":true,\"req\":\"^1.0\"},{\"name\":\"unicode-ident\",\"req\":\"^1.0.12\"},{\"features\":[\"simd\"],\"name\":\"winnow\",\"req\":\"^0.7.0\"}],\"features\":{\"config\":[\"dep:serde\",\"dep:serde_derive\"]}}",
"asn1-rs-derive_0.6.0": "{\"dependencies\":[{\"name\":\"proc-macro2\",\"req\":\"^1.0\"},{\"name\":\"quote\",\"req\":\"^1.0\"},{\"features\":[\"full\"],\"name\":\"syn\",\"req\":\"^2.0\"},{\"name\":\"synstructure\",\"req\":\"^0.13\"}],\"features\":{}}",
"asn1-rs-impl_0.2.0": "{\"dependencies\":[{\"name\":\"proc-macro2\",\"req\":\"^1\"},{\"name\":\"quote\",\"req\":\"^1\"},{\"name\":\"syn\",\"req\":\"^2.0\"}],\"features\":{}}",
"asn1-rs_0.7.1": "{\"dependencies\":[{\"name\":\"asn1-rs-derive\",\"req\":\"^0.6\"},{\"name\":\"asn1-rs-impl\",\"req\":\"^0.2\"},{\"name\":\"bitvec\",\"optional\":true,\"req\":\"^1.0\"},{\"name\":\"colored\",\"optional\":true,\"req\":\"^3.0\"},{\"kind\":\"dev\",\"name\":\"colored\",\"req\":\"^3.0\"},{\"name\":\"cookie-factory\",\"optional\":true,\"req\":\"^0.3.0\"},{\"name\":\"displaydoc\",\"req\":\"^0.2.2\"},{\"kind\":\"dev\",\"name\":\"hex-literal\",\"req\":\"^0.4\"},{\"default_features\":false,\"features\":[\"std\"],\"name\":\"nom\",\"req\":\"^7.0\"},{\"name\":\"num-bigint\",\"optional\":true,\"req\":\"^0.4\"},{\"name\":\"num-traits\",\"req\":\"^0.2.14\"},{\"kind\":\"dev\",\"name\":\"pem\",\"req\":\"^3.0\"},{\"name\":\"rusticata-macros\",\"req\":\"^4.0\"},{\"name\":\"thiserror\",\"req\":\"^2.0.0\"},{\"features\":[\"macros\",\"parsing\",\"formatting\"],\"name\":\"time\",\"optional\":true,\"req\":\"^0.3\"},{\"kind\":\"dev\",\"name\":\"trybuild\",\"req\":\"^1.0\"}],\"features\":{\"bigint\":[\"num-bigint\"],\"bits\":[\"bitvec\"],\"datetime\":[\"time\"],\"debug\":[\"std\",\"colored\"],\"default\":[\"std\"],\"serialize\":[\"cookie-factory\"],\"std\":[],\"trace\":[\"debug\"]}}",

18
codex-rs/Cargo.lock generated
View File

@@ -1628,6 +1628,22 @@ dependencies = [
"thiserror 2.0.18",
]
[[package]]
name = "codex-command-safety"
version = "0.0.0"
dependencies = [
"base64 0.22.1",
"once_cell",
"pretty_assertions",
"regex",
"serde",
"serde_json",
"shlex",
"tree-sitter",
"tree-sitter-bash",
"url",
]
[[package]]
name = "codex-common"
version = "0.0.0"
@@ -1665,6 +1681,7 @@ dependencies = [
"codex-arg0",
"codex-async-utils",
"codex-client",
"codex-command-safety",
"codex-core",
"codex-execpolicy",
"codex-file-search",
@@ -1803,6 +1820,7 @@ dependencies = [
"anyhow",
"async-trait",
"clap",
"codex-command-safety",
"codex-core",
"codex-execpolicy",
"codex-utils-cargo-bin",

View File

@@ -14,6 +14,7 @@ members = [
"cloud-requirements",
"cloud-tasks",
"cloud-tasks-client",
"command-safety",
"cli",
"common",
"core",
@@ -80,6 +81,7 @@ codex-cloud-requirements = { path = "cloud-requirements" }
codex-chatgpt = { path = "chatgpt" }
codex-cli = { path = "cli"}
codex-client = { path = "codex-client" }
codex-command-safety = { path = "command-safety" }
codex-common = { path = "common" }
codex-core = { path = "core" }
codex-secrets = { path = "secrets" }

View File

@@ -0,0 +1,7 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "command-safety",
crate_name = "codex_command_safety",
compile_data = ["src/powershell_parser.ps1"],
)

View File

@@ -0,0 +1,26 @@
[package]
name = "codex-command-safety"
version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
name = "codex_command_safety"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
base64 = { workspace = true }
once_cell = { workspace = true }
regex = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
shlex = { workspace = true }
tree-sitter = { workspace = true }
tree-sitter-bash = { workspace = true }
url = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }

View File

@@ -0,0 +1,207 @@
use std::path::Path;
use tree_sitter::Node;
use tree_sitter::Parser;
use tree_sitter::Tree;
use tree_sitter_bash::LANGUAGE as BASH;
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
/// success or None if parsing failed.
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
let lang = BASH.into();
let mut parser = Parser::new();
#[expect(clippy::expect_used)]
parser.set_language(&lang).expect("load bash grammar");
let old_tree: Option<&Tree> = None;
parser.parse(shell_lc_arg, old_tree)
}
/// Parse a script which may contain multiple simple commands joined only by
/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
///
/// Returns `Some(Vec<command_words>)` if every command is a plain wordonly
/// command and the parse tree does not contain disallowed constructs
/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
/// returns `None`.
pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
if tree.root_node().has_error() {
return None;
}
// List of allowed (named) node kinds for a "word only commands sequence".
// If we encounter a named node that is not in this list we reject.
const ALLOWED_KINDS: &[&str] = &[
// top level containers
"program",
"list",
"pipeline",
// commands & words
"command",
"command_name",
"word",
"string",
"string_content",
"raw_string",
"number",
"concatenation",
];
// Allow only safe punctuation / operator tokens; anything else causes reject.
const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
let root = tree.root_node();
let mut cursor = root.walk();
let mut stack = vec![root];
let mut command_nodes = Vec::new();
while let Some(node) = stack.pop() {
let kind = node.kind();
if node.is_named() {
if !ALLOWED_KINDS.contains(&kind) {
return None;
}
if kind == "command" {
command_nodes.push(node);
}
} else {
// Reject any punctuation / operator tokens that are not explicitly allowed.
if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
return None;
}
if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
// If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
// Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
return None;
}
}
for child in node.children(&mut cursor) {
stack.push(child);
}
}
// Walk uses a stack (LIFO), so re-sort by position to restore source order.
command_nodes.sort_by_key(Node::start_byte);
let mut commands = Vec::new();
for node in command_nodes {
if let Some(words) = parse_plain_command_from_node(node, src) {
commands.push(words);
} else {
return None;
}
}
Some(commands)
}
pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
let [shell, flag, script] = command else {
return None;
};
if !matches!(flag.as_str(), "-lc" | "-c") || !is_supported_posix_shell(shell) {
return None;
}
Some((shell, script))
}
fn is_supported_posix_shell(shell: &str) -> bool {
Path::new(shell)
.file_stem()
.and_then(|stem| stem.to_str())
.is_some_and(|name| matches!(name, "bash" | "zsh" | "sh"))
}
/// Returns the sequence of plain commands within a `bash -lc "..."` or
/// `zsh -lc "..."` invocation when the script only contains word-only commands
/// joined by safe operators.
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
try_parse_word_only_commands_sequence(&tree, script)
}
fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
}
let mut words = Vec::new();
let mut cursor = cmd.walk();
for child in cmd.named_children(&mut cursor) {
match child.kind() {
"command_name" => {
let word_node = child.named_child(0)?;
if word_node.kind() != "word" {
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"word" | "number" => {
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"string" => {
let parsed = parse_double_quoted_string(child, src)?;
words.push(parsed);
}
"raw_string" => {
let parsed = parse_raw_string(child, src)?;
words.push(parsed);
}
"concatenation" => {
// Handle concatenated arguments like -g"*.py"
let mut concatenated = String::new();
let mut concat_cursor = child.walk();
for part in child.named_children(&mut concat_cursor) {
match part.kind() {
"word" | "number" => {
concatenated
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
}
"string" => {
let parsed = parse_double_quoted_string(part, src)?;
concatenated.push_str(&parsed);
}
"raw_string" => {
let parsed = parse_raw_string(part, src)?;
concatenated.push_str(&parsed);
}
_ => return None,
}
}
if concatenated.is_empty() {
return None;
}
words.push(concatenated);
}
_ => return None,
}
}
Some(words)
}
fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
if node.kind() != "string" {
return None;
}
let mut cursor = node.walk();
for part in node.named_children(&mut cursor) {
if part.kind() != "string_content" {
return None;
}
}
let raw = node.utf8_text(src.as_bytes()).ok()?;
let stripped = raw
.strip_prefix('"')
.and_then(|text| text.strip_suffix('"'))?;
Some(stripped.to_string())
}
fn parse_raw_string(node: Node, src: &str) -> Option<String> {
if node.kind() != "raw_string" {
return None;
}
let raw_string = node.utf8_text(src.as_bytes()).ok()?;
let stripped = raw_string
.strip_prefix('\'')
.and_then(|s| s.strip_suffix('\''));
stripped.map(str::to_owned)
}

View File

@@ -1,12 +1,9 @@
use crate::bash::parse_shell_lc_plain_commands;
#[cfg(windows)]
#[path = "windows_dangerous_commands.rs"]
mod windows_dangerous_commands;
use crate::bash_parse::parse_shell_lc_plain_commands;
pub fn command_might_be_dangerous(command: &[String]) -> bool {
#[cfg(windows)]
{
if windows_dangerous_commands::is_dangerous_command_windows(command) {
if crate::windows_dangerous_commands::is_dangerous_command_windows(command) {
return true;
}
}

View File

@@ -1,9 +1,9 @@
use crate::bash::parse_shell_lc_plain_commands;
use crate::bash_parse::parse_shell_lc_plain_commands;
// Find the first matching git subcommand, skipping known global options that
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
// Implemented in `is_dangerous_command` and shared here.
use crate::command_safety::is_dangerous_command::find_git_subcommand;
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
use crate::is_dangerous_command::find_git_subcommand;
use crate::windows_safe_commands::is_safe_command_windows;
pub fn is_known_safe_command(command: &[String]) -> bool {
let command: Vec<String> = command

View File

@@ -0,0 +1,8 @@
pub mod is_dangerous_command;
pub mod is_safe_command;
pub mod windows_safe_commands;
mod bash_parse;
#[cfg(windows)]
#[path = "windows_dangerous_commands.rs"]
mod windows_dangerous_commands;

View File

@@ -33,6 +33,7 @@ codex-app-server-protocol = { workspace = true }
codex-apply-patch = { workspace = true }
codex-async-utils = { workspace = true }
codex-client = { workspace = true }
codex-command-safety = { workspace = true }
codex-execpolicy = { workspace = true }
codex-file-search = { workspace = true }
codex-git = { workspace = true }

View File

@@ -1,3 +0,0 @@
pub mod is_dangerous_command;
pub mod is_safe_command;
pub mod windows_safe_commands;

View File

@@ -7,8 +7,8 @@ use arc_swap::ArcSwap;
use crate::config_loader::ConfigLayerStack;
use crate::config_loader::ConfigLayerStackOrdering;
use crate::is_dangerous_command::command_might_be_dangerous;
use crate::is_safe_command::is_known_safe_command;
use codex_command_safety::is_dangerous_command::command_might_be_dangerous;
use codex_command_safety::is_safe_command::is_known_safe_command;
use codex_execpolicy::AmendError;
use codex_execpolicy::Decision;
use codex_execpolicy::Error as ExecPolicyRuleError;

View File

@@ -21,7 +21,6 @@ pub use codex_thread::CodexThread;
pub use codex_thread::ThreadConfigSnapshot;
mod agent;
mod codex_delegate;
mod command_safety;
pub mod config;
pub mod config_loader;
pub mod connectors;
@@ -135,8 +134,6 @@ pub mod util;
pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
pub use client::X_CODEX_TURN_METADATA_HEADER;
pub use command_safety::is_dangerous_command;
pub use command_safety::is_safe_command;
pub use exec_policy::ExecPolicyError;
pub use exec_policy::check_execpolicy_for_warnings;
pub use exec_policy::load_exec_policy;

View File

@@ -10,7 +10,6 @@ use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::exec_policy::ExecApprovalRequest;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
use crate::shell::Shell;
use crate::tools::context::ToolInvocation;
@@ -26,6 +25,7 @@ use crate::tools::registry::ToolKind;
use crate::tools::runtimes::shell::ShellRequest;
use crate::tools::runtimes::shell::ShellRuntime;
use crate::tools::sandboxing::ToolCtx;
use codex_command_safety::is_safe_command::is_known_safe_command;
pub struct ShellHandler;
@@ -348,7 +348,6 @@ mod tests {
use crate::codex::make_session_and_context;
use crate::exec_env::create_env;
use crate::is_safe_command::is_known_safe_command;
use crate::powershell::try_find_powershell_executable_blocking;
use crate::powershell::try_find_pwsh_executable_blocking;
use crate::sandboxing::SandboxPermissions;
@@ -356,6 +355,7 @@ mod tests {
use crate::shell::ShellType;
use crate::shell_snapshot::ShellSnapshot;
use crate::tools::handlers::ShellCommandHandler;
use codex_command_safety::is_safe_command::is_known_safe_command;
use tokio::sync::watch;
/// The logic for is_known_safe_command() has heuristics for known shells,

View File

@@ -1,5 +1,4 @@
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::EventMsg;
use crate::protocol::TerminalInteractionEvent;
use crate::sandboxing::SandboxPermissions;
@@ -18,6 +17,7 @@ use crate::unified_exec::UnifiedExecProcessManager;
use crate::unified_exec::UnifiedExecResponse;
use crate::unified_exec::WriteStdinRequest;
use async_trait::async_trait;
use codex_command_safety::is_safe_command::is_known_safe_command;
use codex_protocol::models::FunctionCallOutputBody;
use serde::Deserialize;
use std::path::PathBuf;

View File

@@ -23,6 +23,7 @@ workspace = true
anyhow = { workspace = true }
async-trait = { workspace = true }
clap = { workspace = true, features = ["derive"] }
codex-command-safety = { workspace = true }
codex-core = { workspace = true }
codex-execpolicy = { workspace = true }
libc = { workspace = true }

View File

@@ -61,8 +61,8 @@ use std::sync::Arc;
use anyhow::Context as _;
use clap::Parser;
use codex_command_safety::is_dangerous_command::command_might_be_dangerous;
use codex_core::config::find_codex_home;
use codex_core::is_dangerous_command::command_might_be_dangerous;
use codex_core::sandboxing::SandboxPermissions;
use codex_execpolicy::Decision;
use codex_execpolicy::Policy;