mirror of
https://github.com/openai/codex.git
synced 2026-04-28 08:34:54 +00:00
## Why The zsh integration tests were still brittle in two ways: - they relied on `CODEX_TEST_ZSH_PATH` / environment-specific setup, so they often did not exercise the patched zsh fork that `shell-tool-mcp` ships - once the tests consistently used the vendored zsh fork, they exposed real Linux-specific zsh-fork issues in CI In particular, the Linux failures were not just test noise: - the zsh-fork launch path was dropping `ExecRequest.arg0`, so Linux `codex-linux-sandbox` arg0 dispatch did not run and zsh wrapper-mode could receive malformed arguments - the `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` test uses the zsh exec bridge (which talks to the parent over a Unix socket), but Linux restricted sandbox seccomp denies `connect(2)`, causing timeouts on `ubuntu-24.04` x86/arm This PR makes the zsh tests consistently run against the intended vendored zsh fork and fixes/hardens the zsh-fork path so the Linux CI signal is meaningful. ## What Changed - Added a single shared test-only DotSlash file for the patched zsh fork at `codex-rs/exec-server/tests/suite/zsh` (analogous to the existing `bash` test resource). - Updated both app-server and exec-server zsh tests to use that shared DotSlash zsh (no duplicate zsh DotSlash file, no `CODEX_TEST_ZSH_PATH` dependency). - Updated the app-server zsh-fork test helper to resolve the shared DotSlash zsh and avoid silently falling back to host zsh. - Kept the app-server zsh-fork tests configured via `config.toml`, using a test wrapper path where needed to force `zsh -df` (and rewrite `-lc` to `-c`) for the subcommand-decline test. - Hardened the app-server subcommand-decline zsh-fork test for CI variability: - tolerate an extra `/responses` POST with a no-op mock response - tolerate non-target approval ordering while remaining strict on the two `/usr/bin/true` approvals and decline behavior - use `DangerFullAccess` on Linux for this one test because it validates zsh approval flow, not Linux sandbox socket restrictions - Fixed zsh-fork process launching on Linux by preserving `req.arg0` in `ZshExecBridge::execute_shell_request(...)` so `codex-linux-sandbox` arg0 dispatch continues to work. - Moved `maybe_run_zsh_exec_wrapper_mode()` under `arg0_dispatch_or_else(...)` in `app-server` and `cli` so wrapper-mode handling coexists correctly with arg0-dispatched helper modes. - Consolidated duplicated `dotslash -- fetch` resolution logic into shared test support (`core/tests/common/lib.rs`). - Updated `codex-rs/exec-server/tests/suite/accept_elicitation.rs` to use DotSlash zsh and hardened the zsh elicitation test for Bazel/zsh differences by: - resolving an absolute `git` path - running `git init --quiet .` - asserting success / `.git` creation instead of relying on banner text ## Verification - `cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture` - `cargo test -p codex-exec-server accept_elicitation -- --nocapture` - `bazel test //codex-rs/exec-server:exec-server-all-test --test_output=streamed --test_arg=--nocapture --test_arg=accept_elicitation_for_prompt_rule_with_zsh` - CI (`rust-ci`) on the final cleaned commit: `Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu` and `Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu` passed in [run 22291424358](https://github.com/openai/codex/actions/runs/22291424358)
241 lines
8.4 KiB
Rust
241 lines
8.4 KiB
Rust
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
|
use std::borrow::Cow;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::sync::Mutex;
|
|
|
|
use anyhow::Context;
|
|
use anyhow::Result;
|
|
use anyhow::ensure;
|
|
use codex_exec_server::ExecResult;
|
|
use exec_server_test_support::InteractiveClient;
|
|
use exec_server_test_support::create_transport;
|
|
use exec_server_test_support::create_transport_with_shell_path;
|
|
use exec_server_test_support::notify_readable_sandbox;
|
|
use exec_server_test_support::write_default_execpolicy;
|
|
use maplit::hashset;
|
|
use pretty_assertions::assert_eq;
|
|
use rmcp::ServiceExt;
|
|
use rmcp::model::CallToolRequestParams;
|
|
use rmcp::model::CallToolResult;
|
|
use rmcp::model::CreateElicitationRequestParams;
|
|
use rmcp::model::EmptyResult;
|
|
use rmcp::model::ServerResult;
|
|
use rmcp::model::object;
|
|
use serde_json::json;
|
|
use std::os::unix::fs::PermissionsExt;
|
|
use std::os::unix::fs::symlink;
|
|
use tempfile::TempDir;
|
|
use tokio::process::Command;
|
|
|
|
const USE_LOGIN_SHELL: bool = false;
|
|
|
|
/// Verify that when using a read-only sandbox and an execpolicy that prompts,
|
|
/// the proper elicitation is sent. Upon auto-approving the elicitation, the
|
|
/// command should be run privileged outside the sandbox.
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn accept_elicitation_for_prompt_rule() -> Result<()> {
|
|
// Configure a stdio transport that will launch the MCP server using
|
|
// $CODEX_HOME with an execpolicy that prompts for `git init` commands.
|
|
let codex_home = TempDir::new()?;
|
|
write_default_execpolicy(
|
|
r#"
|
|
# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
|
|
prefix_rule(
|
|
pattern = ["git", "init"],
|
|
decision = "prompt",
|
|
match = [
|
|
"git init ."
|
|
],
|
|
)
|
|
"#,
|
|
codex_home.as_ref(),
|
|
)
|
|
.await?;
|
|
let dotslash_cache_temp_dir = TempDir::new()?;
|
|
let dotslash_cache = dotslash_cache_temp_dir.path();
|
|
let transport = create_transport(codex_home.as_ref(), dotslash_cache).await?;
|
|
run_accept_elicitation_for_prompt_rule_with_transport(transport).await
|
|
}
|
|
|
|
/// Verify the same prompt/escalation flow works when the server is launched
|
|
/// with a patched zsh binary.
|
|
///
|
|
/// The suite resolves `tests/suite/zsh` via DotSlash on first use.
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn accept_elicitation_for_prompt_rule_with_zsh() -> Result<()> {
|
|
let codex_home = TempDir::new()?;
|
|
write_default_execpolicy(
|
|
r#"
|
|
# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
|
|
prefix_rule(
|
|
pattern = ["git", "init"],
|
|
decision = "prompt",
|
|
match = [
|
|
"git init ."
|
|
],
|
|
)
|
|
"#,
|
|
codex_home.as_ref(),
|
|
)
|
|
.await?;
|
|
let dotslash_cache_temp_dir = TempDir::new()?;
|
|
let dotslash_cache = dotslash_cache_temp_dir.path();
|
|
let zsh_path = resolve_test_zsh_path(dotslash_cache).await?;
|
|
eprintln!(
|
|
"using zsh path for exec-server test: {}",
|
|
zsh_path.display()
|
|
);
|
|
let transport =
|
|
create_transport_with_shell_path(codex_home.as_ref(), dotslash_cache, &zsh_path).await?;
|
|
run_accept_elicitation_for_prompt_rule_with_transport(transport).await
|
|
}
|
|
|
|
async fn run_accept_elicitation_for_prompt_rule_with_transport(
|
|
transport: rmcp::transport::TokioChildProcess,
|
|
) -> Result<()> {
|
|
// Create an MCP client that approves the expected elicitation message.
|
|
let project_root = TempDir::new()?;
|
|
let project_root_path = project_root.path().canonicalize().unwrap();
|
|
let git_path = resolve_git_path(USE_LOGIN_SHELL).await?;
|
|
let git_init_command = format!("{git_path} init --quiet .");
|
|
let expected_elicitation_message = format!(
|
|
"Allow agent to run `{git_path} init --quiet .` in `{}`?",
|
|
project_root_path.display()
|
|
);
|
|
let elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParams>>> = Default::default();
|
|
let client = InteractiveClient {
|
|
elicitations_to_accept: hashset! { expected_elicitation_message.clone() },
|
|
elicitation_requests: elicitation_requests.clone(),
|
|
};
|
|
|
|
// Start the MCP server.
|
|
let service: rmcp::service::RunningService<rmcp::RoleClient, InteractiveClient> =
|
|
client.serve(transport).await?;
|
|
|
|
// Notify the MCP server about the current sandbox state before making any
|
|
// `shell` tool calls.
|
|
let linux_sandbox_exe_folder = TempDir::new()?;
|
|
let codex_linux_sandbox_exe = if cfg!(target_os = "linux") {
|
|
let codex_linux_sandbox_exe = linux_sandbox_exe_folder.path().join("codex-linux-sandbox");
|
|
let codex_cli = ensure_codex_cli()?;
|
|
symlink(&codex_cli, &codex_linux_sandbox_exe)?;
|
|
Some(codex_linux_sandbox_exe)
|
|
} else {
|
|
None
|
|
};
|
|
let response =
|
|
notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?;
|
|
let ServerResult::EmptyResult(EmptyResult {}) = response else {
|
|
panic!("expected EmptyResult from sandbox state notification but found: {response:?}");
|
|
};
|
|
|
|
// Call the shell tool and verify that an elicitation was created and
|
|
// auto-approved.
|
|
let CallToolResult {
|
|
content, is_error, ..
|
|
} = service
|
|
.call_tool(CallToolRequestParams {
|
|
meta: None,
|
|
name: Cow::Borrowed("shell"),
|
|
arguments: Some(object(json!(
|
|
{
|
|
"login": USE_LOGIN_SHELL,
|
|
"command": git_init_command,
|
|
"workdir": project_root_path.to_string_lossy(),
|
|
}
|
|
))),
|
|
task: None,
|
|
})
|
|
.await?;
|
|
let tool_call_content = content
|
|
.first()
|
|
.expect("expected non-empty content")
|
|
.as_text()
|
|
.expect("expected text content");
|
|
let ExecResult {
|
|
exit_code, output, ..
|
|
} = serde_json::from_str::<ExecResult>(&tool_call_content.text)?;
|
|
// `git init --quiet` is expected to suppress the usual initialization
|
|
// banner, so assert on success and filesystem effects instead of output.
|
|
assert!(
|
|
output.is_empty(),
|
|
"expected no output from `git init --quiet .`, got `{output}`"
|
|
);
|
|
assert_eq!(exit_code, 0, "command should succeed");
|
|
assert_eq!(is_error, Some(false), "command should succeed");
|
|
assert!(
|
|
project_root_path.join(".git").is_dir(),
|
|
"git repo should exist"
|
|
);
|
|
|
|
let elicitation_messages = elicitation_requests
|
|
.lock()
|
|
.unwrap()
|
|
.iter()
|
|
.map(|r| match r {
|
|
rmcp::model::CreateElicitationRequestParams::FormElicitationParams {
|
|
message, ..
|
|
}
|
|
| rmcp::model::CreateElicitationRequestParams::UrlElicitationParams {
|
|
message, ..
|
|
} => message.clone(),
|
|
})
|
|
.collect::<Vec<_>>();
|
|
assert_eq!(vec![expected_elicitation_message], elicitation_messages);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn resolve_test_zsh_path(dotslash_cache: &std::path::Path) -> Result<PathBuf> {
|
|
let dotslash_zsh = codex_utils_cargo_bin::find_resource!("tests/suite/zsh")?;
|
|
core_test_support::fetch_dotslash_file(&dotslash_zsh, Some(dotslash_cache))
|
|
.with_context(|| format!("failed to fetch test zsh from {}", dotslash_zsh.display()))
|
|
}
|
|
|
|
fn ensure_codex_cli() -> Result<PathBuf> {
|
|
let codex_cli = codex_utils_cargo_bin::cargo_bin("codex")?;
|
|
|
|
let metadata = codex_cli.metadata().with_context(|| {
|
|
format!(
|
|
"failed to read metadata for codex binary at {}",
|
|
codex_cli.display()
|
|
)
|
|
})?;
|
|
ensure!(
|
|
metadata.is_file(),
|
|
"expected codex binary at {} to be a file; run `cargo build -p codex-cli --bin codex` before this test",
|
|
codex_cli.display()
|
|
);
|
|
|
|
let mode = metadata.permissions().mode();
|
|
ensure!(
|
|
mode & 0o111 != 0,
|
|
"codex binary at {} is not executable (mode {mode:o}); run `cargo build -p codex-cli --bin codex` before this test",
|
|
codex_cli.display()
|
|
);
|
|
|
|
Ok(codex_cli)
|
|
}
|
|
|
|
async fn resolve_git_path(use_login_shell: bool) -> Result<String> {
|
|
let bash_flag = if use_login_shell { "-lc" } else { "-c" };
|
|
let git = Command::new("bash")
|
|
.arg(bash_flag)
|
|
.arg("command -v git")
|
|
.output()
|
|
.await
|
|
.context("failed to resolve git via login shell")?;
|
|
ensure!(
|
|
git.status.success(),
|
|
"failed to resolve git via login shell: {}",
|
|
String::from_utf8_lossy(&git.stderr)
|
|
);
|
|
let git_path = String::from_utf8(git.stdout)
|
|
.context("git path was not valid utf8")?
|
|
.trim()
|
|
.to_string();
|
|
ensure!(!git_path.is_empty(), "git path should not be empty");
|
|
Ok(git_path)
|
|
}
|