mirror of
https://github.com/openai/codex.git
synced 2026-04-26 23:55:25 +00:00
Promote shared helpers for suite tests (#6460)
## Summary - add `TestCodex::submit_turn_with_policies` and extend the response helpers with reusable tool-call utilities - update the grep_files, read_file, list_dir, shell_serialization, and tools suites to rely on the shared helpers instead of local copies - make the list_dir helper return `anyhow::Result` so clippy no longer warns about `expect` ## Testing - `just fix -p codex-core` - `cargo test -p codex-core --test all suite::grep_files::grep_files_tool_collects_matches` - `cargo test -p codex-core suite::grep_files::grep_files_tool_collects_matches -- --ignored` (filter requests ignored tests so nothing runs, but the build stays clean) ------ [Codex Task](https://chatgpt.com/codex/tasks/task_i_69112d53abac83219813cab4d7cb6446)
This commit is contained in:
@@ -10,11 +10,7 @@ use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
@@ -26,44 +22,11 @@ use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
|
||||
async fn submit_turn(
|
||||
test: &TestCodex,
|
||||
prompt: &str,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd.path().to_path_buf(),
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tool_names(body: &Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
@@ -110,8 +73,7 @@ async fn custom_tool_unknown_returns_custom_output_error() -> Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
test.submit_turn_with_policies(
|
||||
"invoke custom tool",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
@@ -189,8 +151,7 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
test.submit_turn_with_policies(
|
||||
"run the shell command",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
@@ -202,24 +163,22 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}"
|
||||
);
|
||||
|
||||
let blocked_item = second_mock
|
||||
let blocked_output = second_mock
|
||||
.single_request()
|
||||
.function_call_output(call_id_blocked);
|
||||
.function_call_output_content_and_success(call_id_blocked)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("blocked output string");
|
||||
assert_eq!(
|
||||
blocked_item.get("output").and_then(Value::as_str),
|
||||
Some(expected_message.as_str()),
|
||||
blocked_output, expected_message,
|
||||
"unexpected rejection message"
|
||||
);
|
||||
|
||||
let success_item = third_mock
|
||||
let success_output = third_mock
|
||||
.single_request()
|
||||
.function_call_output(call_id_success);
|
||||
let output_json: Value = serde_json::from_str(
|
||||
success_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("success output string"),
|
||||
)?;
|
||||
.function_call_output_content_and_success(call_id_success)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("success output string");
|
||||
let output_json: Value = serde_json::from_str(&success_output)?;
|
||||
assert_eq!(
|
||||
output_json["metadata"]["exit_code"].as_i64(),
|
||||
Some(0),
|
||||
@@ -348,8 +307,7 @@ async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
test.submit_turn_with_policies(
|
||||
"list tools",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
@@ -423,8 +381,7 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
test.submit_turn_with_policies(
|
||||
"run a long command",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
@@ -600,8 +557,7 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
test.submit_turn_with_policies(
|
||||
"spawn a missing binary",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
|
||||
Reference in New Issue
Block a user