Promote shared helpers for suite tests (#6460)

## Summary - add `TestCodex::submit_turn_with_policies` and extend the response helpers with reusable tool-call utilities - update the grep_files, read_file, list_dir, shell_serialization, and tools suites to rely on the shared helpers instead of local copies - make the list_dir helper return `anyhow::Result` so clippy no longer warns about `expect` ## Testing - `just fix -p codex-core` - `cargo test -p codex-core --test all suite::grep_files::grep_files_tool_collects_matches` - `cargo test -p codex-core suite::grep_files::grep_files_tool_collects_matches -- --ignored` (filter requests ignored tests so nothing runs, but the build stays clean) ------ [Codex Task](https://chatgpt.com/codex/tasks/task_i_69112d53abac83219813cab4d7cb6446)
2026-04-27 16:15:09 +00:00 · 2025-11-13 17:12:10 -08:00
parent f3c6b1334b
commit 2a6e9b20df
9 changed files with 305 additions and 841 deletions
--- a/codex-rs/core/tests/suite/tool_harness.rs
+++ b/codex-rs/core/tests/suite/tool_harness.rs
@@ -14,6 +14,7 @@ use codex_protocol::plan_tool::StepStatus;
 use codex_protocol::user_input::UserInput;
 use core_test_support::assert_regex_match;
 use core_test_support::responses;
+use core_test_support::responses::ResponsesRequest;
 use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
@@ -30,12 +31,22 @@ use serde_json::Value;
 use serde_json::json;
 use wiremock::matchers::any;

-fn extract_output_text(item: &Value) -> Option<&str> {
-    item.get("output").and_then(|value| match value {
-        Value::String(text) => Some(text.as_str()),
-        Value::Object(obj) => obj.get("content").and_then(Value::as_str),
-        _ => None,
-    })
+fn call_output(req: &ResponsesRequest, call_id: &str) -> (String, Option<bool>) {
+    let raw = req.function_call_output(call_id);
+    assert_eq!(
+        raw.get("call_id").and_then(Value::as_str),
+        Some(call_id),
+        "mismatched call_id in function_call_output"
+    );
+    let (content_opt, success) = match req.function_call_output_content_and_success(call_id) {
+        Some(values) => values,
+        None => panic!("function_call_output present"),
+    };
+    let content = match content_opt {
+        Some(c) => c,
+        None => panic!("function_call_output content present"),
+    };
+    (content, success)
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -90,9 +101,8 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;

    let req = second_mock.single_request();
-    let output_item = req.function_call_output(call_id);
-    let output_text = extract_output_text(&output_item).expect("output text present");
-    let exec_output: Value = serde_json::from_str(output_text)?;
+    let (output_text, _) = call_output(&req, call_id);
+    let exec_output: Value = serde_json::from_str(&output_text)?;
    assert_eq!(exec_output["metadata"]["exit_code"], 0);
    let stdout = exec_output["output"].as_str().expect("stdout field");
    assert_regex_match(r"(?s)^tool harness\n?$", stdout);
@@ -174,12 +184,7 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
    assert!(saw_plan_update, "expected PlanUpdate event");

    let req = second_mock.single_request();
-    let output_item = req.function_call_output(call_id);
-    assert_eq!(
-        output_item.get("call_id").and_then(Value::as_str),
-        Some(call_id)
-    );
-    let output_text = extract_output_text(&output_item).expect("output text present");
+    let (output_text, _success_flag) = call_output(&req, call_id);
    assert_eq!(output_text, "Plan updated");

    Ok(())
@@ -252,22 +257,12 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
    );

    let req = second_mock.single_request();
-    let output_item = req.function_call_output(call_id);
-    assert_eq!(
-        output_item.get("call_id").and_then(Value::as_str),
-        Some(call_id)
-    );
-    let output_text = extract_output_text(&output_item).expect("output text present");
+    let (output_text, success_flag) = call_output(&req, call_id);
    assert!(
        output_text.contains("failed to parse function arguments"),
        "expected parse error message in output text, got {output_text:?}"
    );
-    if let Some(success_flag) = output_item
-        .get("output")
-        .and_then(|value| value.as_object())
-        .and_then(|obj| obj.get("success"))
-        .and_then(serde_json::Value::as_bool)
-    {
+    if let Some(success_flag) = success_flag {
        assert!(
            !success_flag,
            "expected tool output to mark success=false for malformed payload"
@@ -357,12 +352,7 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()
    assert!(patch_end_success);

    let req = second_mock.single_request();
-    let output_item = req.function_call_output(call_id);
-    assert_eq!(
-        output_item.get("call_id").and_then(Value::as_str),
-        Some(call_id)
-    );
-    let output_text = extract_output_text(&output_item).expect("output text present");
+    let (output_text, _success_flag) = call_output(&req, call_id);

    let expected_pattern = format!(
        r"(?s)^Exit code: 0
@@ -372,7 +362,7 @@ Success. Updated the following files:
 A {file_name}
 ?$"
    );
-    assert_regex_match(&expected_pattern, output_text);
+    assert_regex_match(&expected_pattern, &output_text);

    let updated_contents = fs::read_to_string(file_path)?;
    assert_eq!(
@@ -437,12 +427,7 @@ async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;

    let req = second_mock.single_request();
-    let output_item = req.function_call_output(call_id);
-    assert_eq!(
-        output_item.get("call_id").and_then(Value::as_str),
-        Some(call_id)
-    );
-    let output_text = extract_output_text(&output_item).expect("output text present");
+    let (output_text, success_flag) = call_output(&req, call_id);

    assert!(
        output_text.contains("apply_patch verification failed"),
@@ -453,12 +438,7 @@ async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
        "expected parse diagnostics in output text, got {output_text:?}"
    );

-    if let Some(success_flag) = output_item
-        .get("output")
-        .and_then(|value| value.as_object())
-        .and_then(|obj| obj.get("success"))
-        .and_then(serde_json::Value::as_bool)
-    {
+    if let Some(success_flag) = success_flag {
        assert!(
            !success_flag,
            "expected tool output to mark success=false for parse failures"