mirror of
https://github.com/openai/codex.git
synced 2026-04-29 08:56:38 +00:00
## Why `argument-comment-lint` was green in CI even though the repo still had many uncommented literal arguments. The main gap was target coverage: the repo wrapper did not force Cargo to inspect test-only call sites, so examples like the `latest_session_lookup_params(true, ...)` tests in `codex-rs/tui_app_server/src/lib.rs` never entered the blocking CI path. This change cleans up the existing backlog, makes the default repo lint path cover all Cargo targets, and starts rolling that stricter CI enforcement out on the platform where it is currently validated. ## What changed - mechanically fixed existing `argument-comment-lint` violations across the `codex-rs` workspace, including tests, examples, and benches - updated `tools/argument-comment-lint/run-prebuilt-linter.sh` and `tools/argument-comment-lint/run.sh` so non-`--fix` runs default to `--all-targets` unless the caller explicitly narrows the target set - fixed both wrappers so forwarded cargo arguments after `--` are preserved with a single separator - documented the new default behavior in `tools/argument-comment-lint/README.md` - updated `rust-ci` so the macOS lint lane keeps the plain wrapper invocation and therefore enforces `--all-targets`, while Linux and Windows temporarily pass `-- --lib --bins` That temporary CI split keeps the stricter all-targets check where it is already cleaned up, while leaving room to finish the remaining Linux- and Windows-specific target-gated cleanup before enabling `--all-targets` on those runners. The Linux and Windows failures on the intermediate revision were caused by the wrapper forwarding bug, not by additional lint findings in those lanes. ## Validation - `bash -n tools/argument-comment-lint/run.sh` - `bash -n tools/argument-comment-lint/run-prebuilt-linter.sh` - shell-level wrapper forwarding check for `-- --lib --bins` - shell-level wrapper forwarding check for `-- --tests` - `just argument-comment-lint` - `cargo test` in `tools/argument-comment-lint` - `cargo test -p codex-terminal-detection` ## Follow-up - Clean up remaining Linux-only target-gated callsites, then switch the Linux lint lane back to the plain wrapper invocation. - Clean up remaining Windows-only target-gated callsites, then switch the Windows lint lane back to the plain wrapper invocation.
517 lines
18 KiB
Rust
517 lines
18 KiB
Rust
use std::collections::HashMap;
|
|
use std::env;
|
|
use std::path::Path;
|
|
use std::path::PathBuf;
|
|
|
|
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
|
use codex_mcp_server::CodexToolCallParam;
|
|
use codex_mcp_server::ExecApprovalElicitRequestParams;
|
|
use codex_mcp_server::ExecApprovalResponse;
|
|
use codex_mcp_server::PatchApprovalElicitRequestParams;
|
|
use codex_mcp_server::PatchApprovalResponse;
|
|
use codex_protocol::protocol::FileChange;
|
|
use codex_protocol::protocol::ReviewDecision;
|
|
use codex_shell_command::parse_command;
|
|
use pretty_assertions::assert_eq;
|
|
use rmcp::model::JsonRpcResponse;
|
|
use rmcp::model::JsonRpcVersion2_0;
|
|
use rmcp::model::RequestId;
|
|
use serde_json::json;
|
|
use tempfile::TempDir;
|
|
use tokio::time::timeout;
|
|
use wiremock::MockServer;
|
|
|
|
use core_test_support::skip_if_no_network;
|
|
use mcp_test_support::McpProcess;
|
|
use mcp_test_support::create_apply_patch_sse_response;
|
|
use mcp_test_support::create_final_assistant_message_sse_response;
|
|
use mcp_test_support::create_mock_responses_server;
|
|
use mcp_test_support::create_shell_command_sse_response;
|
|
use mcp_test_support::format_with_current_shell;
|
|
|
|
// Allow ample time on slower CI or under load to avoid flakes.
|
|
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
|
|
|
|
/// Test that a shell command that is not on the "trusted" list triggers an
|
|
/// elicitation request to the MCP and that sending the approval runs the
|
|
/// command, as expected.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
|
async fn test_shell_command_approval_triggers_elicitation() {
|
|
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
|
println!(
|
|
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Apparently `#[tokio::test]` must return `()`, so we create a helper
|
|
// function that returns `Result` so we can use `?` in favor of `unwrap`.
|
|
if let Err(err) = shell_command_approval_triggers_elicitation().await {
|
|
panic!("failure: {err}");
|
|
}
|
|
}
|
|
|
|
async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
|
// Use a simple, untrusted command that creates a file so we can
|
|
// observe a side-effect.
|
|
let workdir_for_shell_function_call = TempDir::new()?;
|
|
let created_filename = "created_by_shell_tool.txt";
|
|
let created_file = workdir_for_shell_function_call
|
|
.path()
|
|
.join(created_filename);
|
|
|
|
let shell_command = if cfg!(windows) {
|
|
vec![
|
|
"New-Item".to_string(),
|
|
"-ItemType".to_string(),
|
|
"File".to_string(),
|
|
"-Path".to_string(),
|
|
created_filename.to_string(),
|
|
"-Force".to_string(),
|
|
]
|
|
} else {
|
|
vec!["touch".to_string(), created_filename.to_string()]
|
|
};
|
|
let expected_shell_command =
|
|
format_with_current_shell(&shlex::try_join(shell_command.iter().map(String::as_str))?);
|
|
|
|
let McpHandle {
|
|
process: mut mcp_process,
|
|
server: _server,
|
|
dir: _dir,
|
|
} = create_mcp_process(vec![
|
|
create_shell_command_sse_response(
|
|
shell_command.clone(),
|
|
Some(workdir_for_shell_function_call.path()),
|
|
Some(5_000),
|
|
"call1234",
|
|
)?,
|
|
create_final_assistant_message_sse_response("File created!")?,
|
|
])
|
|
.await?;
|
|
|
|
// Send a "codex" tool request, which should hit the responses endpoint.
|
|
// In turn, it should reply with a tool call, which the MCP should forward
|
|
// as an elicitation.
|
|
let codex_request_id = mcp_process
|
|
.send_codex_tool_call(CodexToolCallParam {
|
|
prompt: "run `git init`".to_string(),
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let elicitation_request = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_request_message(),
|
|
)
|
|
.await??;
|
|
|
|
assert_eq!(elicitation_request.jsonrpc, JsonRpcVersion2_0);
|
|
assert_eq!(elicitation_request.request.method, "elicitation/create");
|
|
|
|
let elicitation_request_id = elicitation_request.id.clone();
|
|
let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
|
|
elicitation_request
|
|
.request
|
|
.params
|
|
.clone()
|
|
.ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
|
|
)?;
|
|
assert_eq!(
|
|
elicitation_request.request.params,
|
|
Some(create_expected_elicitation_request_params(
|
|
expected_shell_command,
|
|
workdir_for_shell_function_call.path(),
|
|
codex_request_id.to_string(),
|
|
params.codex_event_id.clone(),
|
|
params.thread_id,
|
|
)?)
|
|
);
|
|
|
|
// Accept the `git init` request by responding to the elicitation.
|
|
mcp_process
|
|
.send_response(
|
|
elicitation_request_id,
|
|
serde_json::to_value(ExecApprovalResponse {
|
|
decision: ReviewDecision::Approved,
|
|
})?,
|
|
)
|
|
.await?;
|
|
|
|
// Verify task_complete notification arrives before the tool call completes.
|
|
#[expect(clippy::expect_used)]
|
|
let _task_complete = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_legacy_task_complete_notification(),
|
|
)
|
|
.await
|
|
.expect("task_complete_notification timeout")
|
|
.expect("task_complete_notification resp");
|
|
|
|
// Verify the original `codex` tool call completes and that the file was created.
|
|
let codex_response = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_response_message(RequestId::Number(codex_request_id)),
|
|
)
|
|
.await??;
|
|
assert_eq!(
|
|
JsonRpcResponse {
|
|
jsonrpc: JsonRpcVersion2_0,
|
|
id: RequestId::Number(codex_request_id),
|
|
result: json!({
|
|
"content": [
|
|
{
|
|
"text": "File created!",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"structuredContent": {
|
|
"threadId": params.thread_id,
|
|
"content": "File created!"
|
|
}
|
|
}),
|
|
},
|
|
codex_response
|
|
);
|
|
|
|
assert!(created_file.is_file(), "created file should exist");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn create_expected_elicitation_request_params(
|
|
command: Vec<String>,
|
|
workdir: &Path,
|
|
codex_mcp_tool_call_id: String,
|
|
codex_event_id: String,
|
|
thread_id: codex_protocol::ThreadId,
|
|
) -> anyhow::Result<serde_json::Value> {
|
|
let expected_message = format!(
|
|
"Allow Codex to run `{}` in `{}`?",
|
|
shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
|
|
workdir.to_string_lossy()
|
|
);
|
|
let codex_parsed_cmd = parse_command::parse_command(&command);
|
|
let params_json = serde_json::to_value(ExecApprovalElicitRequestParams {
|
|
message: expected_message,
|
|
requested_schema: json!({"type":"object","properties":{}}),
|
|
thread_id,
|
|
codex_elicitation: "exec-approval".to_string(),
|
|
codex_mcp_tool_call_id,
|
|
codex_event_id,
|
|
codex_command: command,
|
|
codex_cwd: workdir.to_path_buf(),
|
|
codex_call_id: "call1234".to_string(),
|
|
codex_parsed_cmd,
|
|
})?;
|
|
Ok(params_json)
|
|
}
|
|
|
|
/// Test that patch approval triggers an elicitation request to the MCP and that
|
|
/// sending the approval applies the patch, as expected.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn test_patch_approval_triggers_elicitation() {
|
|
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
|
println!(
|
|
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
|
);
|
|
return;
|
|
}
|
|
|
|
if let Err(err) = patch_approval_triggers_elicitation().await {
|
|
panic!("failure: {err}");
|
|
}
|
|
}
|
|
|
|
async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
|
|
if cfg!(windows) {
|
|
// powershell apply_patch shell calls are not parsed into apply patch approvals
|
|
|
|
return Ok(());
|
|
}
|
|
|
|
let cwd = TempDir::new()?;
|
|
let test_file = cwd.path().join("destination_file.txt");
|
|
std::fs::write(&test_file, "original content\n")?;
|
|
|
|
let patch_content = format!(
|
|
"*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
|
|
test_file.as_path().to_string_lossy()
|
|
);
|
|
|
|
let McpHandle {
|
|
process: mut mcp_process,
|
|
server: _server,
|
|
dir: _dir,
|
|
} = create_mcp_process(vec![
|
|
create_apply_patch_sse_response(&patch_content, "call1234")?,
|
|
create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
|
|
])
|
|
.await?;
|
|
|
|
// Send a "codex" tool request that will trigger the apply_patch command
|
|
let codex_request_id = mcp_process
|
|
.send_codex_tool_call(CodexToolCallParam {
|
|
cwd: Some(cwd.path().to_string_lossy().to_string()),
|
|
prompt: "please modify the test file".to_string(),
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let elicitation_request = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_request_message(),
|
|
)
|
|
.await??;
|
|
|
|
assert_eq!(elicitation_request.jsonrpc, JsonRpcVersion2_0);
|
|
assert_eq!(elicitation_request.request.method, "elicitation/create");
|
|
|
|
let elicitation_request_id = elicitation_request.id.clone();
|
|
let params = serde_json::from_value::<PatchApprovalElicitRequestParams>(
|
|
elicitation_request
|
|
.request
|
|
.params
|
|
.clone()
|
|
.ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
|
|
)?;
|
|
|
|
let mut expected_changes = HashMap::new();
|
|
expected_changes.insert(
|
|
test_file.as_path().to_path_buf(),
|
|
FileChange::Update {
|
|
unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
|
|
move_path: None,
|
|
},
|
|
);
|
|
|
|
assert_eq!(
|
|
elicitation_request.request.params,
|
|
Some(create_expected_patch_approval_elicitation_request_params(
|
|
expected_changes,
|
|
/*grant_root*/ None, // No grant_root expected
|
|
/*reason*/ None, // No reason expected
|
|
codex_request_id.to_string(),
|
|
params.codex_event_id.clone(),
|
|
params.thread_id,
|
|
)?)
|
|
);
|
|
|
|
// Accept the patch approval request by responding to the elicitation
|
|
mcp_process
|
|
.send_response(
|
|
elicitation_request_id,
|
|
serde_json::to_value(PatchApprovalResponse {
|
|
decision: ReviewDecision::Approved,
|
|
})?,
|
|
)
|
|
.await?;
|
|
|
|
// Verify the original `codex` tool call completes
|
|
let codex_response = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_response_message(RequestId::Number(codex_request_id)),
|
|
)
|
|
.await??;
|
|
assert_eq!(
|
|
JsonRpcResponse {
|
|
jsonrpc: JsonRpcVersion2_0,
|
|
id: RequestId::Number(codex_request_id),
|
|
result: json!({
|
|
"content": [
|
|
{
|
|
"text": "Patch has been applied successfully!",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"structuredContent": {
|
|
"threadId": params.thread_id,
|
|
"content": "Patch has been applied successfully!"
|
|
}
|
|
}),
|
|
},
|
|
codex_response
|
|
);
|
|
|
|
let file_contents = std::fs::read_to_string(test_file.as_path())?;
|
|
assert_eq!(file_contents, "modified content\n");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn test_codex_tool_passes_base_instructions() {
|
|
skip_if_no_network!();
|
|
|
|
// Apparently `#[tokio::test]` must return `()`, so we create a helper
|
|
// function that returns `Result` so we can use `?` in favor of `unwrap`.
|
|
if let Err(err) = codex_tool_passes_base_instructions().await {
|
|
panic!("failure: {err}");
|
|
}
|
|
}
|
|
|
|
async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
|
|
#![expect(clippy::expect_used, clippy::unwrap_used)]
|
|
|
|
let server =
|
|
create_mock_responses_server(vec![create_final_assistant_message_sse_response("Enjoy!")?])
|
|
.await;
|
|
|
|
// Run `codex mcp` with a specific config.toml.
|
|
let codex_home = TempDir::new()?;
|
|
create_config_toml(codex_home.path(), &server.uri())?;
|
|
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
|
|
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
|
|
|
|
// Send a "codex" tool request, which should hit the responses endpoint.
|
|
let codex_request_id = mcp_process
|
|
.send_codex_tool_call(CodexToolCallParam {
|
|
prompt: "How are you?".to_string(),
|
|
base_instructions: Some("You are a helpful assistant.".to_string()),
|
|
developer_instructions: Some("Foreshadow upcoming tool calls.".to_string()),
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
|
|
let codex_response = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp_process.read_stream_until_response_message(RequestId::Number(codex_request_id)),
|
|
)
|
|
.await??;
|
|
assert_eq!(codex_response.jsonrpc, JsonRpcVersion2_0);
|
|
assert_eq!(codex_response.id, RequestId::Number(codex_request_id));
|
|
assert_eq!(
|
|
codex_response.result,
|
|
json!({
|
|
"content": [
|
|
{
|
|
"text": "Enjoy!",
|
|
"type": "text"
|
|
}
|
|
],
|
|
"structuredContent": {
|
|
"threadId": codex_response
|
|
.result
|
|
.get("structuredContent")
|
|
.and_then(|v| v.get("threadId"))
|
|
.and_then(serde_json::Value::as_str)
|
|
.expect("codex tool response should include structuredContent.threadId"),
|
|
"content": "Enjoy!"
|
|
}
|
|
})
|
|
);
|
|
|
|
let requests = server.received_requests().await.unwrap();
|
|
let request = requests[0].body_json::<serde_json::Value>()?;
|
|
let instructions = request["instructions"]
|
|
.as_str()
|
|
.expect("responses request should include instructions");
|
|
assert!(instructions.starts_with("You are a helpful assistant."));
|
|
|
|
let developer_messages: Vec<&serde_json::Value> = request["input"]
|
|
.as_array()
|
|
.expect("responses request should include input items")
|
|
.iter()
|
|
.filter(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer"))
|
|
.collect();
|
|
let developer_contents: Vec<&str> = developer_messages
|
|
.iter()
|
|
.filter_map(|msg| msg.get("content").and_then(serde_json::Value::as_array))
|
|
.flat_map(|content| content.iter())
|
|
.filter(|span| span.get("type").and_then(serde_json::Value::as_str) == Some("input_text"))
|
|
.filter_map(|span| span.get("text").and_then(serde_json::Value::as_str))
|
|
.collect();
|
|
assert!(
|
|
developer_contents
|
|
.iter()
|
|
.any(|content| content.contains("`sandbox_mode`")),
|
|
"expected permissions developer message, got {developer_contents:?}"
|
|
);
|
|
assert!(
|
|
developer_contents.contains(&"Foreshadow upcoming tool calls."),
|
|
"expected developer instructions in developer messages, got {developer_contents:?}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn create_expected_patch_approval_elicitation_request_params(
|
|
changes: HashMap<PathBuf, FileChange>,
|
|
grant_root: Option<PathBuf>,
|
|
reason: Option<String>,
|
|
codex_mcp_tool_call_id: String,
|
|
codex_event_id: String,
|
|
thread_id: codex_protocol::ThreadId,
|
|
) -> anyhow::Result<serde_json::Value> {
|
|
let mut message_lines = Vec::new();
|
|
if let Some(r) = &reason {
|
|
message_lines.push(r.clone());
|
|
}
|
|
message_lines.push("Allow Codex to apply proposed code changes?".to_string());
|
|
let params_json = serde_json::to_value(PatchApprovalElicitRequestParams {
|
|
message: message_lines.join("\n"),
|
|
requested_schema: json!({"type":"object","properties":{}}),
|
|
thread_id,
|
|
codex_elicitation: "patch-approval".to_string(),
|
|
codex_mcp_tool_call_id,
|
|
codex_event_id,
|
|
codex_reason: reason,
|
|
codex_grant_root: grant_root,
|
|
codex_changes: changes,
|
|
codex_call_id: "call1234".to_string(),
|
|
})?;
|
|
|
|
Ok(params_json)
|
|
}
|
|
|
|
/// This handle is used to ensure that the MockServer and TempDir are not dropped while
|
|
/// the McpProcess is still running.
|
|
pub struct McpHandle {
|
|
pub process: McpProcess,
|
|
/// Retain the server for the lifetime of the McpProcess.
|
|
#[allow(dead_code)]
|
|
server: MockServer,
|
|
/// Retain the temporary directory for the lifetime of the McpProcess.
|
|
#[allow(dead_code)]
|
|
dir: TempDir,
|
|
}
|
|
|
|
async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
|
|
let server = create_mock_responses_server(responses).await;
|
|
let codex_home = TempDir::new()?;
|
|
create_config_toml(codex_home.path(), &server.uri())?;
|
|
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
|
|
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
|
|
Ok(McpHandle {
|
|
process: mcp_process,
|
|
server,
|
|
dir: codex_home,
|
|
})
|
|
}
|
|
|
|
/// Create a Codex config that uses the mock server as the model provider.
|
|
/// It also uses `approval_policy = "untrusted"` so that we exercise the
|
|
/// elicitation code path for shell commands.
|
|
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
|
|
let config_toml = codex_home.join("config.toml");
|
|
std::fs::write(
|
|
config_toml,
|
|
format!(
|
|
r#"
|
|
model = "mock-model"
|
|
approval_policy = "untrusted"
|
|
sandbox_policy = "workspace-write"
|
|
|
|
model_provider = "mock_provider"
|
|
|
|
[model_providers.mock_provider]
|
|
name = "Mock provider for test"
|
|
base_url = "{server_uri}/v1"
|
|
wire_api = "responses"
|
|
request_max_retries = 0
|
|
stream_max_retries = 0
|
|
|
|
[features]
|
|
"#
|
|
),
|
|
)
|
|
}
|