Promote shared helpers for suite tests (#6460)

## Summary
- add `TestCodex::submit_turn_with_policies` and extend the response
helpers with reusable tool-call utilities
- update the grep_files, read_file, list_dir, shell_serialization, and
tools suites to rely on the shared helpers instead of local copies
- make the list_dir helper return `anyhow::Result` so clippy no longer
warns about `expect`

## Testing
- `just fix -p codex-core`
- `cargo test -p codex-core --test all
suite::grep_files::grep_files_tool_collects_matches`
- `cargo test -p codex-core
suite::grep_files::grep_files_tool_collects_matches -- --ignored`
(filter requests ignored tests so nothing runs, but the build stays
clean)


------
[Codex
Task](https://chatgpt.com/codex/tasks/task_i_69112d53abac83219813cab4d7cb6446)
This commit is contained in:
Ahmed Ibrahim
2025-11-13 17:12:10 -08:00
committed by GitHub
parent f3c6b1334b
commit 2a6e9b20df
9 changed files with 305 additions and 841 deletions

View File

@@ -1,6 +1,7 @@
use std::sync::Arc;
use std::sync::Mutex;
use anyhow::Result;
use serde_json::Value;
use wiremock::BodyPrintLimit;
use wiremock::Match;
@@ -9,6 +10,7 @@ use wiremock::MockBuilder;
use wiremock::MockServer;
use wiremock::Respond;
use wiremock::ResponseTemplate;
use wiremock::matchers::any;
use wiremock::matchers::method;
use wiremock::matchers::path_regex;
@@ -38,6 +40,10 @@ impl ResponseMock {
self.requests.lock().unwrap().clone()
}
pub fn last_request(&self) -> Option<ResponsesRequest> {
self.requests.lock().unwrap().last().cloned()
}
/// Returns true if any captured request contains a `function_call` with the
/// provided `call_id`.
pub fn saw_function_call(&self, call_id: &str) -> bool {
@@ -130,6 +136,42 @@ impl ResponsesRequest {
.map(str::to_string)
}
pub fn function_call_output_content_and_success(
&self,
call_id: &str,
) -> Option<(Option<String>, Option<bool>)> {
self.call_output_content_and_success(call_id, "function_call_output")
}
pub fn custom_tool_call_output_content_and_success(
&self,
call_id: &str,
) -> Option<(Option<String>, Option<bool>)> {
self.call_output_content_and_success(call_id, "custom_tool_call_output")
}
fn call_output_content_and_success(
&self,
call_id: &str,
call_type: &str,
) -> Option<(Option<String>, Option<bool>)> {
let output = self
.call_output(call_id, call_type)
.get("output")
.cloned()
.unwrap_or(Value::Null);
match output {
Value::String(text) => Some((Some(text), None)),
Value::Object(obj) => Some((
obj.get("content")
.and_then(Value::as_str)
.map(str::to_string),
obj.get("success").and_then(Value::as_bool),
)),
_ => Some((None, None)),
}
}
pub fn header(&self, name: &str) -> Option<String> {
self.0
.headers
@@ -487,6 +529,37 @@ pub async fn start_mock_server() -> MockServer {
.await
}
#[derive(Clone)]
pub struct FunctionCallResponseMocks {
pub function_call: ResponseMock,
pub completion: ResponseMock,
}
pub async fn mount_function_call_agent_response(
server: &MockServer,
call_id: &str,
arguments: &str,
tool_name: &str,
) -> FunctionCallResponseMocks {
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, tool_name, arguments),
ev_completed("resp-1"),
]);
let function_call = mount_sse_once_match(server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
let completion = mount_sse_once_match(server, any(), second_response).await;
FunctionCallResponseMocks {
function_call,
completion,
}
}
/// Mounts a sequence of SSE response bodies and serves them in order for each
/// POST to `/v1/responses`. Panics if more requests are received than bodies
/// provided. Also asserts the exact number of expected calls.