mirror of
https://github.com/openai/codex.git
synced 2026-04-20 04:34:47 +00:00
Compare commits
6 Commits
codex-debu
...
jif/new-te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cad43c500c | ||
|
|
d3bbb287ef | ||
|
|
5c229c58e8 | ||
|
|
15b40f9ae0 | ||
|
|
e2c9289504 | ||
|
|
f32b84e7fe |
2
codex-rs/Cargo.lock
generated
2
codex-rs/Cargo.lock
generated
@@ -1073,7 +1073,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
name = "core_test_support"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-core",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
|
||||
@@ -11,4 +11,6 @@ codex-core = { path = "../.." }
|
||||
serde_json = "1"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1", features = ["time"] }
|
||||
anyhow = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
wiremock = "0.6"
|
||||
|
||||
404
codex-rs/core/tests/common/agent_harness.rs
Normal file
404
codex-rs/core/tests/common/agent_harness.rs
Normal file
@@ -0,0 +1,404 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::fs::File;
|
||||
use std::mem;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::timeout;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Request;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
use crate::load_default_config_for_test;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct HarnessOutputs {
|
||||
pub request: Value,
|
||||
pub events: Vec<Value>,
|
||||
}
|
||||
|
||||
pub struct HarnessData {
|
||||
pub actual: HarnessOutputs,
|
||||
pub expected: HarnessOutputs,
|
||||
}
|
||||
|
||||
pub async fn run_fixture(dir: impl AsRef<Path>) -> Result<HarnessData> {
|
||||
let fixture = HarnessFixture::load(dir.as_ref())?;
|
||||
let sequence_count = fixture.sse_sequences.len();
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let responder_state = Arc::new(Mutex::new(VecDeque::from(fixture.sse_sequences)));
|
||||
let responder = SequentialSseResponder {
|
||||
bodies: responder_state.clone(),
|
||||
};
|
||||
let builder = Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(responder);
|
||||
let builder = if sequence_count > 0 {
|
||||
builder.expect(sequence_count as u64)
|
||||
} else {
|
||||
builder
|
||||
};
|
||||
builder.mount(&server).await;
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "harness-mock".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let codex_home = TempDir::new().context("create temp dir for config")?;
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = provider.clone();
|
||||
config.model_provider_id = provider.name.clone();
|
||||
config
|
||||
.model_providers
|
||||
.insert(provider.name.clone(), provider.clone());
|
||||
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
|
||||
let new_conversation = conversation_manager
|
||||
.new_conversation(config.clone())
|
||||
.await
|
||||
.context("spawn conversation")?;
|
||||
let mut events: Vec<Event> = vec![Event {
|
||||
id: String::new(),
|
||||
msg: EventMsg::SessionConfigured(new_conversation.session_configured),
|
||||
}];
|
||||
let codex = new_conversation.conversation;
|
||||
|
||||
for op in &fixture.prompt_ops {
|
||||
codex
|
||||
.submit(op.clone())
|
||||
.await
|
||||
.with_context(|| format!("submit op {op:?}"))?;
|
||||
}
|
||||
|
||||
let expected_event_count = fixture.expected.events.len();
|
||||
anyhow::ensure!(
|
||||
expected_event_count >= events.len(),
|
||||
"expected events fixture must include at least the session configured event"
|
||||
);
|
||||
|
||||
while events.len() < expected_event_count {
|
||||
let next = timeout(Duration::from_secs(10), codex.next_event())
|
||||
.await
|
||||
.context("timeout waiting for event")??;
|
||||
events.push(next);
|
||||
}
|
||||
|
||||
loop {
|
||||
let extra = match timeout(Duration::from_millis(200), codex.next_event()).await {
|
||||
Ok(Ok(event)) => event,
|
||||
Ok(Err(err)) => anyhow::bail!("error receiving extra event: {err}"),
|
||||
Err(_) => break,
|
||||
};
|
||||
events.push(extra);
|
||||
}
|
||||
|
||||
let received = server
|
||||
.received_requests()
|
||||
.await
|
||||
.context("read recorded requests")?;
|
||||
anyhow::ensure!(
|
||||
received.len() == sequence_count,
|
||||
"expected {sequence_count} Responses API requests but recorded {}",
|
||||
received.len(),
|
||||
);
|
||||
|
||||
let replacements = build_replacements(&config, codex_home.path(), &server);
|
||||
let sanitized_events = sanitize_events(events, &replacements);
|
||||
|
||||
let mut request_values: Vec<Value> = Vec::new();
|
||||
for req in received {
|
||||
let body = req
|
||||
.body_json::<Value>()
|
||||
.context("parse request body JSON")?;
|
||||
let sanitized = sanitize_request(body, &replacements);
|
||||
request_values.push(sanitized);
|
||||
}
|
||||
|
||||
anyhow::ensure!(
|
||||
responder_state.lock().expect("lock bodies").is_empty(),
|
||||
"unused SSE responses remain in fixture"
|
||||
);
|
||||
|
||||
let request_value = match request_values.len() {
|
||||
0 => Value::Array(Vec::new()),
|
||||
1 => request_values.into_iter().next().expect("request value"),
|
||||
_ => Value::Array(request_values),
|
||||
};
|
||||
|
||||
let actual = HarnessOutputs {
|
||||
request: request_value,
|
||||
events: sanitized_events,
|
||||
};
|
||||
|
||||
Ok(HarnessData {
|
||||
actual,
|
||||
expected: fixture.expected,
|
||||
})
|
||||
}
|
||||
|
||||
struct HarnessFixture {
|
||||
prompt_ops: Vec<Op>,
|
||||
expected: HarnessOutputs,
|
||||
sse_sequences: Vec<String>,
|
||||
}
|
||||
|
||||
impl HarnessFixture {
|
||||
fn load(dir: &Path) -> Result<Self> {
|
||||
let paths = HarnessFixturePaths::new(dir.to_path_buf());
|
||||
let sse_sequences = load_sse_sequences(&paths.response_events)
|
||||
.with_context(|| paths.context_message("load SSE fixture"))?;
|
||||
let prompt_ops: Vec<Op> = load_json(&paths.user_prompts, "user prompts")?;
|
||||
let expected_request: Value = load_json(&paths.expected_request, "expected request")?;
|
||||
let expected_events: Vec<Value> = load_json(&paths.expected_events, "expected events")?;
|
||||
|
||||
let expected = HarnessOutputs {
|
||||
request: expected_request,
|
||||
events: expected_events,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
prompt_ops,
|
||||
expected,
|
||||
sse_sequences,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct HarnessFixturePaths {
|
||||
dir: PathBuf,
|
||||
response_events: PathBuf,
|
||||
user_prompts: PathBuf,
|
||||
expected_request: PathBuf,
|
||||
expected_events: PathBuf,
|
||||
}
|
||||
|
||||
impl HarnessFixturePaths {
|
||||
fn new(dir: PathBuf) -> Self {
|
||||
let response_events = dir.join("response_events.json");
|
||||
let user_prompts = dir.join("user_prompts.json");
|
||||
let expected_request = dir.join("expected_request.json");
|
||||
let expected_events = dir.join("expected_events.json");
|
||||
Self {
|
||||
dir,
|
||||
response_events,
|
||||
user_prompts,
|
||||
expected_request,
|
||||
expected_events,
|
||||
}
|
||||
}
|
||||
|
||||
fn context_message(&self, action: &str) -> String {
|
||||
format!("{action} for fixture {}", self.dir.display())
|
||||
}
|
||||
}
|
||||
|
||||
fn load_json<T>(path: &Path, description: &str) -> Result<T>
|
||||
where
|
||||
T: DeserializeOwned,
|
||||
{
|
||||
let file = File::open(path)
|
||||
.with_context(|| format!("open {description} fixture {}", path.display()))?;
|
||||
serde_json::from_reader(file)
|
||||
.with_context(|| format!("parse {description} fixture {}", path.display()))
|
||||
}
|
||||
|
||||
struct SequentialSseResponder {
|
||||
bodies: Arc<Mutex<VecDeque<String>>>,
|
||||
}
|
||||
|
||||
impl Respond for SequentialSseResponder {
|
||||
fn respond(&self, _: &Request) -> ResponseTemplate {
|
||||
let mut bodies = self.bodies.lock().expect("lock SSE bodies");
|
||||
match bodies.pop_front() {
|
||||
Some(body) => ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(body, "text/event-stream"),
|
||||
None => ResponseTemplate::new(500).set_body_string("unexpected additional SSE request"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_sse_sequences(path: &Path) -> Result<Vec<String>> {
|
||||
let file = File::open(path).with_context(|| format!("open SSE file {}", path.display()))?;
|
||||
let value: Value = serde_json::from_reader(file)
|
||||
.with_context(|| format!("parse SSE fixture {}", path.display()))?;
|
||||
match value {
|
||||
Value::Array(entries) => {
|
||||
if entries.iter().all(Value::is_object) {
|
||||
Ok(vec![events_to_sse(entries)?])
|
||||
} else if entries.iter().all(Value::is_array) {
|
||||
let mut bodies = Vec::new();
|
||||
for seq in entries {
|
||||
let seq_events = seq.as_array().cloned().ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"SSE fixture {} entries must be objects or arrays",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
bodies.push(events_to_sse(seq_events)?);
|
||||
}
|
||||
Ok(bodies)
|
||||
} else {
|
||||
anyhow::bail!(
|
||||
"SSE fixture {} must be an array of objects or an array of arrays",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => anyhow::bail!("SSE fixture {} must be a JSON array", path.display()),
|
||||
}
|
||||
}
|
||||
|
||||
fn events_to_sse(events: Vec<Value>) -> Result<String> {
|
||||
let mut body = String::new();
|
||||
for event in events {
|
||||
let Some(obj) = event.as_object() else {
|
||||
anyhow::bail!("SSE event must be an object: {event}");
|
||||
};
|
||||
let kind = obj
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.ok_or_else(|| anyhow::anyhow!("SSE event missing type: {event}"))?;
|
||||
body.push_str(&format!("event: {kind}\n"));
|
||||
if obj.len() > 1 {
|
||||
body.push_str("data: ");
|
||||
body.push_str(&serde_json::to_string(&event)?);
|
||||
body.push('\n');
|
||||
}
|
||||
body.push('\n');
|
||||
}
|
||||
Ok(body)
|
||||
}
|
||||
|
||||
fn build_replacements(
|
||||
config: &codex_core::config::Config,
|
||||
codex_home: &Path,
|
||||
server: &MockServer,
|
||||
) -> Vec<(String, &'static str)> {
|
||||
let mut pairs = Vec::new();
|
||||
let cwd = config.cwd.to_string_lossy().into_owned();
|
||||
if !cwd.is_empty() {
|
||||
pairs.push((cwd, "<CWD>"));
|
||||
}
|
||||
let home = codex_home.to_string_lossy().into_owned();
|
||||
if !home.is_empty() {
|
||||
pairs.push((home, "<CODEX_HOME>"));
|
||||
}
|
||||
pairs.push((server.uri(), "<MOCK_SERVER>"));
|
||||
pairs
|
||||
}
|
||||
|
||||
fn sanitize_events(events: Vec<Event>, replacements: &[(String, &'static str)]) -> Vec<Value> {
|
||||
events
|
||||
.into_iter()
|
||||
.map(|event| {
|
||||
let mut value = serde_json::to_value(event).expect("serialize event");
|
||||
sanitize_value(&mut value, replacements);
|
||||
|
||||
if let Some(msg) = value.get_mut("msg")
|
||||
&& let Some(msg_obj) = msg.as_object_mut()
|
||||
{
|
||||
let msg_type = msg_obj
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
if msg_type == "session_configured" {
|
||||
msg_obj.insert(
|
||||
"session_id".to_string(),
|
||||
Value::String("<session>".to_string()),
|
||||
);
|
||||
if msg_obj.contains_key("history_log_id") {
|
||||
msg_obj.insert("history_log_id".to_string(), json!(0));
|
||||
}
|
||||
if msg_obj.contains_key("rollout_path") {
|
||||
msg_obj.insert(
|
||||
"rollout_path".to_string(),
|
||||
Value::String("<rollout>".to_string()),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
value
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn sanitize_request(mut value: Value, replacements: &[(String, &'static str)]) -> Value {
|
||||
sanitize_value(&mut value, replacements);
|
||||
if let Some(obj) = value.as_object_mut()
|
||||
&& obj.contains_key("prompt_cache_key")
|
||||
{
|
||||
obj.insert(
|
||||
"prompt_cache_key".to_string(),
|
||||
Value::String("<prompt_cache_key>".to_string()),
|
||||
);
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
fn sanitize_value(value: &mut Value, replacements: &[(String, &'static str)]) {
|
||||
match value {
|
||||
Value::String(s) => {
|
||||
let mut current = mem::take(s);
|
||||
for (pattern, replacement) in replacements {
|
||||
if !pattern.is_empty() && current.contains(pattern) {
|
||||
current = current.replace(pattern, replacement);
|
||||
}
|
||||
}
|
||||
normalize_line_endings(&mut current);
|
||||
*s = current;
|
||||
}
|
||||
Value::Array(items) => {
|
||||
for item in items {
|
||||
sanitize_value(item, replacements);
|
||||
}
|
||||
}
|
||||
Value::Object(map) => {
|
||||
for item in map.values_mut() {
|
||||
sanitize_value(item, replacements);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_line_endings(text: &mut String) {
|
||||
if text.contains('\r') {
|
||||
let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
|
||||
*text = normalized;
|
||||
}
|
||||
}
|
||||
@@ -126,3 +126,5 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod agent_harness;
|
||||
|
||||
32
codex-rs/core/tests/fixtures/harness/README.md
vendored
Normal file
32
codex-rs/core/tests/fixtures/harness/README.md
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# Agent Harness Fixtures
|
||||
|
||||
These fixtures drive the integration tests in `core/tests/suite/agent_harness.rs`.
|
||||
Each subdirectory under this folder corresponds to a single end-to-end scenario.
|
||||
|
||||
## Adding a New Fixture
|
||||
|
||||
1. Create a new directory inside `tests/fixtures/harness/` and give it a
|
||||
descriptive name (e.g. `multi_tool_call`).
|
||||
2. Add the following JSON files inside the directory:
|
||||
- `user_prompts.json`: the list of `Op` objects that will be submitted to the
|
||||
harness.
|
||||
- `response_events.json`: the SSE payloads that the mock Responses API will
|
||||
replay. A top-level array can contain objects (single request) or arrays
|
||||
(multiple sequential requests).
|
||||
- `expected_request.json`: the sanitized request body we expect the harness
|
||||
to send. This can be either a single object or an array when multiple
|
||||
requests are issued.
|
||||
- `expected_events.json`: the sanitized Codex events we expect to observe.
|
||||
3. Run `cargo test -p codex-core suite::agent_harness::<fixture_name>` to verify
|
||||
the scenario passes.
|
||||
|
||||
The test macro in `core/tests/suite/agent_harness.rs` will automatically pick up
|
||||
the new directory once it exists.
|
||||
|
||||
## Expected JSON Files Are Partial
|
||||
|
||||
The comparison helpers only assert that the fields present in the expected JSON
|
||||
match the actual values. Any keys omitted from `expected_request.json` or
|
||||
`expected_events.json` are treated as "don't care" and are ignored during the
|
||||
assertion. This keeps the fixtures stable even when Codex introduces new fields;
|
||||
only include the pieces that matter for the scenario you are describing.
|
||||
78
codex-rs/core/tests/fixtures/harness/multi_delta/expected_events.json
vendored
Normal file
78
codex-rs/core/tests/fixtures/harness/multi_delta/expected_events.json
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
[
|
||||
{
|
||||
"id": "",
|
||||
"msg": {
|
||||
"history_entry_count": 0,
|
||||
"history_log_id": 0,
|
||||
"model": "gpt-5",
|
||||
"rollout_path": "<rollout>",
|
||||
"session_id": "<session>",
|
||||
"type": "session_configured"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"model_context_window": 272000,
|
||||
"type": "task_started"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": "I can",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": " hear",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": " you!",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"message": "I can hear you!",
|
||||
"type": "agent_message"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"info": {
|
||||
"last_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 12,
|
||||
"output_tokens": 6,
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 18
|
||||
},
|
||||
"model_context_window": 272000,
|
||||
"total_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 12,
|
||||
"output_tokens": 6,
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 18
|
||||
}
|
||||
},
|
||||
"type": "token_count"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"last_agent_message": "I can hear you!",
|
||||
"type": "task_complete"
|
||||
}
|
||||
}
|
||||
]
|
||||
105
codex-rs/core/tests/fixtures/harness/multi_delta/expected_request.json
vendored
Normal file
105
codex-rs/core/tests/fixtures/harness/multi_delta/expected_request.json
vendored
Normal file
File diff suppressed because one or more lines are too long
23
codex-rs/core/tests/fixtures/harness/multi_delta/response_events.json
vendored
Normal file
23
codex-rs/core/tests/fixtures/harness/multi_delta/response_events.json
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
[
|
||||
{"type": "response.created", "response": {"id": "resp_multi"}},
|
||||
{"type": "response.output_text.delta", "delta": "I can"},
|
||||
{"type": "response.output_text.delta", "delta": " hear"},
|
||||
{"type": "response.output_text.delta", "delta": " you!"},
|
||||
{"type": "response.output_item.done", "item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": "I can hear you!"}
|
||||
]
|
||||
}},
|
||||
{"type": "response.completed", "response": {
|
||||
"id": "resp_multi",
|
||||
"usage": {
|
||||
"input_tokens": 12,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 6,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 18
|
||||
}
|
||||
}}
|
||||
]
|
||||
8
codex-rs/core/tests/fixtures/harness/multi_delta/user_prompts.json
vendored
Normal file
8
codex-rs/core/tests/fixtures/harness/multi_delta/user_prompts.json
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
[
|
||||
{
|
||||
"type": "user_input",
|
||||
"items": [
|
||||
{"type": "text", "text": "Can you hear me?"}
|
||||
]
|
||||
}
|
||||
]
|
||||
85
codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_events.json
vendored
Normal file
85
codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_events.json
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
[
|
||||
{
|
||||
"id": "",
|
||||
"msg": {
|
||||
"history_entry_count": 0,
|
||||
"history_log_id": 0,
|
||||
"model": "gpt-5",
|
||||
"rollout_path": "<rollout>",
|
||||
"session_id": "<session>",
|
||||
"type": "session_configured"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"model_context_window": 272000,
|
||||
"type": "task_started"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": "Considering Earth's rotation.",
|
||||
"type": "agent_reasoning_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": " Sunlight scatters differently.",
|
||||
"type": "agent_reasoning_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": "Sunrise happens when your location rotates into sunlight.",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": " Sunset is the reverse, heading into night.",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"message": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies.",
|
||||
"type": "agent_message"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"info": {
|
||||
"last_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 18,
|
||||
"output_tokens": 32,
|
||||
"reasoning_output_tokens": 6,
|
||||
"total_tokens": 50
|
||||
},
|
||||
"model_context_window": 272000,
|
||||
"total_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 18,
|
||||
"output_tokens": 32,
|
||||
"reasoning_output_tokens": 6,
|
||||
"total_tokens": 50
|
||||
}
|
||||
},
|
||||
"type": "token_count"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"last_agent_message": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies.",
|
||||
"type": "task_complete"
|
||||
}
|
||||
}
|
||||
]
|
||||
105
codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_request.json
vendored
Normal file
105
codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_request.json
vendored
Normal file
File diff suppressed because one or more lines are too long
56
codex-rs/core/tests/fixtures/harness/reasoning_turn/response_events.json
vendored
Normal file
56
codex-rs/core/tests/fixtures/harness/reasoning_turn/response_events.json
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {
|
||||
"id": "resp_reason"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.reasoning_summary_part.added"
|
||||
},
|
||||
{
|
||||
"type": "response.reasoning_summary_text.delta",
|
||||
"delta": "Considering Earth's rotation."
|
||||
},
|
||||
{
|
||||
"type": "response.reasoning_summary_text.delta",
|
||||
"delta": " Sunlight scatters differently."
|
||||
},
|
||||
{
|
||||
"type": "response.output_text.delta",
|
||||
"delta": "Sunrise happens when your location rotates into sunlight."
|
||||
},
|
||||
{
|
||||
"type": "response.output_text.delta",
|
||||
"delta": " Sunset is the reverse, heading into night."
|
||||
},
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies."
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp_reason",
|
||||
"usage": {
|
||||
"input_tokens": 18,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 32,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 6
|
||||
},
|
||||
"total_tokens": 50,
|
||||
"reasoning_tokens": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
11
codex-rs/core/tests/fixtures/harness/reasoning_turn/user_prompts.json
vendored
Normal file
11
codex-rs/core/tests/fixtures/harness/reasoning_turn/user_prompts.json
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
[
|
||||
{
|
||||
"type": "user_input",
|
||||
"items": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Explain the difference between sunrise and sunset."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
63
codex-rs/core/tests/fixtures/harness/single_turn/expected_events.json
vendored
Normal file
63
codex-rs/core/tests/fixtures/harness/single_turn/expected_events.json
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
[
|
||||
{
|
||||
"id": "",
|
||||
"msg": {
|
||||
"history_entry_count": 0,
|
||||
"history_log_id": 0,
|
||||
"model": "gpt-5",
|
||||
"rollout_path": "<rollout>",
|
||||
"session_id": "<session>",
|
||||
"type": "session_configured"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"model_context_window": 272000,
|
||||
"type": "task_started"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"delta": "Hello from fixture",
|
||||
"type": "agent_message_delta"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"message": "Hello from fixture",
|
||||
"type": "agent_message"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"info": {
|
||||
"last_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 15
|
||||
},
|
||||
"total_token_usage": {
|
||||
"cached_input_tokens": 0,
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 15
|
||||
}
|
||||
},
|
||||
"type": "token_count"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0",
|
||||
"msg": {
|
||||
"last_agent_message": "Hello from fixture",
|
||||
"type": "task_complete"
|
||||
}
|
||||
}
|
||||
]
|
||||
105
codex-rs/core/tests/fixtures/harness/single_turn/expected_request.json
vendored
Normal file
105
codex-rs/core/tests/fixtures/harness/single_turn/expected_request.json
vendored
Normal file
File diff suppressed because one or more lines are too long
21
codex-rs/core/tests/fixtures/harness/single_turn/response_events.json
vendored
Normal file
21
codex-rs/core/tests/fixtures/harness/single_turn/response_events.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
[
|
||||
{"type": "response.created", "response": {"id": "resp_123"}},
|
||||
{"type": "response.output_text.delta", "delta": "Hello from fixture"},
|
||||
{"type": "response.output_item.done", "item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": "Hello from fixture"}
|
||||
]
|
||||
}},
|
||||
{"type": "response.completed", "response": {
|
||||
"id": "resp_123",
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 5,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 15
|
||||
}
|
||||
}}
|
||||
]
|
||||
8
codex-rs/core/tests/fixtures/harness/single_turn/user_prompts.json
vendored
Normal file
8
codex-rs/core/tests/fixtures/harness/single_turn/user_prompts.json
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
[
|
||||
{
|
||||
"type": "user_input",
|
||||
"items": [
|
||||
{"type": "text", "text": "Hello?"}
|
||||
]
|
||||
}
|
||||
]
|
||||
116
codex-rs/core/tests/suite/agent_harness.rs
Normal file
116
codex-rs/core/tests/suite/agent_harness.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use core_test_support::agent_harness;
|
||||
use core_test_support::agent_harness::HarnessOutputs;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
|
||||
const HARNESS_FIXTURE_ROOT: &str = "tests/fixtures/harness";
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn harness_fixtures_match_expectations() {
|
||||
for fixture in fixture_names() {
|
||||
run_fixture_test(&fixture).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_fixture_test(fixture: &str) {
|
||||
let harness = agent_harness::run_fixture(fixture_dir(fixture))
|
||||
.await
|
||||
.unwrap_or_else(|err| panic!("run agent harness for fixture {fixture}: {err}"));
|
||||
|
||||
assert_harness_outputs_match(fixture, &harness.actual, &harness.expected);
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_harness_outputs_match(fixture: &str, actual: &HarnessOutputs, expected: &HarnessOutputs) {
|
||||
assert_value_matches(fixture, &actual.request, &expected.request, "request");
|
||||
assert_eq!(
|
||||
actual.events.len(),
|
||||
expected.events.len(),
|
||||
"event count mismatch in fixture {fixture}: expected {} events but got {}",
|
||||
expected.events.len(),
|
||||
actual.events.len()
|
||||
);
|
||||
for (index, expected_event) in expected.events.iter().enumerate() {
|
||||
let path = format!("events[{index}]");
|
||||
let actual_event = actual
|
||||
.events
|
||||
.get(index)
|
||||
.unwrap_or_else(|| panic!("missing actual event at {path} for fixture {fixture}"));
|
||||
assert_value_matches(fixture, actual_event, expected_event, &path);
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_value_matches(fixture: &str, actual: &Value, expected: &Value, path: &str) {
|
||||
match expected {
|
||||
Value::Object(expected_map) => {
|
||||
let actual_map = actual.as_object().unwrap_or_else(|| {
|
||||
panic!("expected object at {path} in fixture {fixture}, got {actual:?}")
|
||||
});
|
||||
for (key, expected_value) in expected_map {
|
||||
let next_path = if path.is_empty() {
|
||||
key.to_string()
|
||||
} else {
|
||||
format!("{path}.{key}")
|
||||
};
|
||||
let actual_value = actual_map.get(key).unwrap_or_else(|| {
|
||||
panic!("missing field {next_path} in actual value for fixture {fixture}")
|
||||
});
|
||||
assert_value_matches(fixture, actual_value, expected_value, &next_path);
|
||||
}
|
||||
}
|
||||
Value::Array(expected_items) => {
|
||||
let actual_items = actual.as_array().unwrap_or_else(|| {
|
||||
panic!("expected array at {path} in fixture {fixture}, got {actual:?}")
|
||||
});
|
||||
assert_eq!(
|
||||
actual_items.len(),
|
||||
expected_items.len(),
|
||||
"array length mismatch at {path} in fixture {fixture}"
|
||||
);
|
||||
for (index, expected_value) in expected_items.iter().enumerate() {
|
||||
let next_path = if path.is_empty() {
|
||||
format!("[{index}]")
|
||||
} else {
|
||||
format!("{path}[{index}]")
|
||||
};
|
||||
let actual_value = actual_items.get(index).unwrap_or_else(|| {
|
||||
panic!("missing array element at {next_path} for fixture {fixture}")
|
||||
});
|
||||
assert_value_matches(fixture, actual_value, expected_value, &next_path);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
assert_eq!(actual, expected, "mismatch at {path} in fixture {fixture}");
|
||||
}
|
||||
}
|
||||
}
|
||||
fn fixtures_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(HARNESS_FIXTURE_ROOT)
|
||||
}
|
||||
|
||||
fn fixture_dir(fixture: &str) -> PathBuf {
|
||||
fixtures_root().join(fixture)
|
||||
}
|
||||
|
||||
fn fixture_names() -> Vec<String> {
|
||||
let dir = fixtures_root();
|
||||
let mut names: Vec<String> = std::fs::read_dir(&dir)
|
||||
.unwrap_or_else(|err| panic!("read fixture directory {}: {err}", dir.display()))
|
||||
.filter_map(|entry| {
|
||||
entry.ok().and_then(|e| {
|
||||
let path = e.path();
|
||||
(path.is_dir()).then(|| {
|
||||
path.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.map(|name| name.to_string())
|
||||
})
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
.collect();
|
||||
names.sort();
|
||||
names
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
|
||||
mod agent_harness;
|
||||
mod cli_stream;
|
||||
mod client;
|
||||
mod compact;
|
||||
|
||||
Reference in New Issue
Block a user