feat: add shell snapshot for shell command (#7786)

This commit is contained in:
jif-oai
2025-12-11 13:46:43 +00:00
committed by GitHub
parent b2280d6205
commit 29381ba5c2
14 changed files with 301 additions and 129 deletions

View File

@@ -10,6 +10,7 @@ use crate::exec_policy::create_exec_approval_requirement_for_command;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
use crate::shell::Shell;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -42,13 +43,18 @@ impl ShellHandler {
}
impl ShellCommandHandler {
fn base_command(shell: &Shell, command: &str, login: Option<bool>) -> Vec<String> {
let use_login_shell = login.unwrap_or(true);
shell.derive_exec_args(command, use_login_shell)
}
fn to_exec_params(
params: ShellCommandToolCallParams,
session: &crate::codex::Session,
turn_context: &TurnContext,
) -> ExecParams {
let shell = session.user_shell();
let command = shell.derive_exec_args(&params.command, params.login.unwrap_or(true));
let command = Self::base_command(shell.as_ref(), &params.command, params.login);
ExecParams {
command,
@@ -155,7 +161,7 @@ impl ToolHandler for ShellCommandHandler {
serde_json::from_str::<ShellCommandToolCallParams>(arguments)
.map(|params| {
let shell = invocation.session.user_shell();
let command = shell.derive_exec_args(&params.command, params.login.unwrap_or(true));
let command = Self::base_command(shell.as_ref(), &params.command, params.login);
!is_known_safe_command(&command)
})
.unwrap_or(true)
@@ -289,6 +295,7 @@ impl ShellHandler {
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use std::sync::Arc;
use codex_protocol::models::ShellCommandToolCallParams;
use pretty_assertions::assert_eq;
@@ -299,6 +306,7 @@ mod tests {
use crate::sandboxing::SandboxPermissions;
use crate::shell::Shell;
use crate::shell::ShellType;
use crate::shell_snapshot::ShellSnapshot;
use crate::tools::handlers::ShellCommandHandler;
/// The logic for is_known_safe_command() has heuristics for known shells,
@@ -372,4 +380,29 @@ mod tests {
assert_eq!(exec_params.justification, justification);
assert_eq!(exec_params.arg0, None);
}
#[test]
fn shell_command_handler_respects_explicit_login_flag() {
let shell = Shell {
shell_type: ShellType::Bash,
shell_path: PathBuf::from("/bin/bash"),
shell_snapshot: Some(Arc::new(ShellSnapshot {
path: PathBuf::from("/tmp/snapshot.sh"),
})),
};
let login_command =
ShellCommandHandler::base_command(&shell, "echo login shell", Some(true));
assert_eq!(
login_command,
shell.derive_exec_args("echo login shell", true)
);
let non_login_command =
ShellCommandHandler::base_command(&shell, "echo non login shell", Some(false));
assert_eq!(
non_login_command,
shell.derive_exec_args("echo non login shell", false)
);
}
}

View File

@@ -34,8 +34,8 @@ struct ExecCommandArgs {
workdir: Option<String>,
#[serde(default)]
shell: Option<String>,
#[serde(default)]
login: Option<bool>,
#[serde(default = "default_login")]
login: bool,
#[serde(default = "default_exec_yield_time_ms")]
yield_time_ms: u64,
#[serde(default)]
@@ -66,6 +66,10 @@ fn default_write_stdin_yield_time_ms() -> u64 {
250
}
fn default_login() -> bool {
true
}
#[async_trait]
impl ToolHandler for UnifiedExecHandler {
fn kind(&self) -> ToolKind {
@@ -125,11 +129,10 @@ impl ToolHandler for UnifiedExecHandler {
))
})?;
let process_id = manager.allocate_process_id().await;
let command = get_command(&args, session.user_shell());
let command_for_intercept = get_command(&args, session.user_shell());
let ExecCommandArgs {
workdir,
login,
yield_time_ms,
max_output_tokens,
sandbox_permissions,
@@ -156,7 +159,7 @@ impl ToolHandler for UnifiedExecHandler {
let cwd = workdir.clone().unwrap_or_else(|| context.turn.cwd.clone());
if let Some(output) = intercept_apply_patch(
&command_for_intercept,
&command,
&cwd,
Some(yield_time_ms),
context.session.as_ref(),
@@ -177,14 +180,6 @@ impl ToolHandler for UnifiedExecHandler {
&context.call_id,
None,
);
let command = if login.is_none() {
context
.session
.user_shell()
.wrap_command_with_snapshot(&command_for_intercept)
} else {
command_for_intercept
};
let emitter = ToolEmitter::unified_exec(
&command,
cwd.clone(),
@@ -258,14 +253,15 @@ impl ToolHandler for UnifiedExecHandler {
}
fn get_command(args: &ExecCommandArgs, session_shell: Arc<Shell>) -> Vec<String> {
if let Some(shell_str) = &args.shell {
let model_shell = args.shell.as_ref().map(|shell_str| {
let mut shell = get_shell_by_model_provided_path(&PathBuf::from(shell_str));
shell.shell_snapshot = None;
return shell.derive_exec_args(&args.cmd, args.login.unwrap_or(true));
}
shell
});
let use_login_shell = args.login.unwrap_or(session_shell.shell_snapshot.is_none());
session_shell.derive_exec_args(&args.cmd, use_login_shell)
let shell = model_shell.as_ref().unwrap_or(session_shell.as_ref());
shell.derive_exec_args(&args.cmd, args.login)
}
fn format_response(response: &UnifiedExecResponse) -> String {
@@ -329,7 +325,13 @@ mod tests {
let command = get_command(&args, Arc::new(default_user_shell()));
assert_eq!(command[2], "echo hello");
assert_eq!(command.last(), Some(&"echo hello".to_string()));
if command
.iter()
.any(|arg| arg.eq_ignore_ascii_case("-Command"))
{
assert!(command.contains(&"-NoProfile".to_string()));
}
}
#[test]

View File

@@ -7,6 +7,7 @@ small and focused and reuses the orchestrator for approvals + sandbox + retry.
use crate::exec::ExecExpiration;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::SandboxPermissions;
use crate::shell::Shell;
use crate::tools::sandboxing::ToolError;
use std::collections::HashMap;
use std::path::Path;
@@ -38,3 +39,39 @@ pub(crate) fn build_command_spec(
justification,
})
}
/// POSIX-only helper: for commands produced by `Shell::derive_exec_args`
/// for Bash/Zsh/sh of the form `[shell_path, "-lc", "<script>"]`, and
/// when a snapshot is configured on the session shell, rewrite the argv
/// to a single non-login shell that sources the snapshot before running
/// the original script:
///
/// shell -lc "<script>"
/// => shell -c ". SNAPSHOT && <script>"
///
/// On non-POSIX shells or non-matching commands this is a no-op.
pub(crate) fn maybe_wrap_shell_lc_with_snapshot(
command: &[String],
session_shell: &Shell,
) -> Vec<String> {
let Some(snapshot) = &session_shell.shell_snapshot else {
return command.to_vec();
};
if command.len() < 3 {
return command.to_vec();
}
let flag = command[1].as_str();
if flag != "-lc" {
return command.to_vec();
}
let snapshot_path = snapshot.path.to_string_lossy();
let rewritten_script = format!(". \"{snapshot_path}\" && {}", command[2]);
let mut rewritten = command.to_vec();
rewritten[1] = "-c".to_string();
rewritten[2] = rewritten_script;
rewritten
}

View File

@@ -8,6 +8,7 @@ use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxPermissions;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ExecApprovalRequirement;
@@ -140,8 +141,12 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx<'_>,
) -> Result<ExecToolCallOutput, ToolError> {
let base_command = &req.command;
let session_shell = ctx.session.user_shell();
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
let spec = build_command_spec(
&req.command,
&command,
&req.cwd,
&req.env,
req.timeout_ms.into(),

View File

@@ -9,6 +9,7 @@ use crate::error::SandboxErr;
use crate::exec::ExecExpiration;
use crate::sandboxing::SandboxPermissions;
use crate::tools::runtimes::build_command_spec;
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ExecApprovalRequirement;
@@ -159,10 +160,14 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
&mut self,
req: &UnifiedExecRequest,
attempt: &SandboxAttempt<'_>,
_ctx: &ToolCtx<'_>,
ctx: &ToolCtx<'_>,
) -> Result<UnifiedExecSession, ToolError> {
let base_command = &req.command;
let session_shell = ctx.session.user_shell();
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
let spec = build_command_spec(
&req.command,
&command,
&req.cwd,
&req.env,
ExecExpiration::DefaultTimeout,

View File

@@ -153,7 +153,8 @@ fn create_exec_command_tool() -> ToolSpec {
"login".to_string(),
JsonSchema::Boolean {
description: Some(
"Whether to run the shell with -l/-i semantics. Defaults to true.".to_string(),
"Whether to run the shell with -l/-i semantics. Defaults to false unless a shell snapshot is available."
.to_string(),
),
},
);
@@ -335,7 +336,7 @@ fn create_shell_command_tool() -> ToolSpec {
"login".to_string(),
JsonSchema::Boolean {
description: Some(
"Whether to run the shell with login shell semantics. Defaults to true."
"Whether to run the shell with login shell semantics. Defaults to false unless a shell snapshot is available."
.to_string(),
),
},