Fix line ending stuff

Drop env for tests
Make harness more automatic and add a README
2026-04-20 04:34:47 +00:00 · 2025-09-22 14:07:56 +01:00 · 2025-09-22 11:23:49 +01:00 · 2025-09-22 10:07:12 +01:00 · 2025-09-19 18:30:32 +02:00 · 2025-09-19 18:20:14 +02:00
19 changed files with 1227 additions and 0 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1073,7 +1073,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 name = "core_test_support"
 version = "0.0.0"
 dependencies = [
+ "anyhow",
 "codex-core",
+ "serde",
 "serde_json",
 "tempfile",
 "tokio",
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -11,4 +11,6 @@ codex-core = { path = "../.." }
 serde_json = "1"
 tempfile = "3"
 tokio = { version = "1", features = ["time"] }
+anyhow = "1"
+serde = { version = "1", features = ["derive"] }
 wiremock = "0.6"
--- a/codex-rs/core/tests/common/agent_harness.rs
+++ b/codex-rs/core/tests/common/agent_harness.rs
@@ -0,0 +1,404 @@
+use std::collections::VecDeque;
+use std::fs::File;
+use std::mem;
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::Mutex;
+use std::time::Duration;
+
+use anyhow::Context;
+use anyhow::Result;
+use codex_core::CodexAuth;
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::WireApi;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::Op;
+use serde::de::DeserializeOwned;
+use serde_json::Value;
+use serde_json::json;
+use tempfile::TempDir;
+use tokio::time::timeout;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::Request;
+use wiremock::Respond;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use crate::load_default_config_for_test;
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct HarnessOutputs {
+    pub request: Value,
+    pub events: Vec<Value>,
+}
+
+pub struct HarnessData {
+    pub actual: HarnessOutputs,
+    pub expected: HarnessOutputs,
+}
+
+pub async fn run_fixture(dir: impl AsRef<Path>) -> Result<HarnessData> {
+    let fixture = HarnessFixture::load(dir.as_ref())?;
+    let sequence_count = fixture.sse_sequences.len();
+
+    let server = MockServer::start().await;
+
+    let responder_state = Arc::new(Mutex::new(VecDeque::from(fixture.sse_sequences)));
+    let responder = SequentialSseResponder {
+        bodies: responder_state.clone(),
+    };
+    let builder = Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(responder);
+    let builder = if sequence_count > 0 {
+        builder.expect(sequence_count as u64)
+    } else {
+        builder
+    };
+    builder.mount(&server).await;
+
+    let provider = ModelProviderInfo {
+        name: "harness-mock".into(),
+        base_url: Some(format!("{}/v1", server.uri())),
+        env_key: Some("PATH".into()),
+        env_key_instructions: None,
+        wire_api: WireApi::Responses,
+        query_params: None,
+        http_headers: None,
+        env_http_headers: None,
+        request_max_retries: Some(0),
+        stream_max_retries: Some(0),
+        stream_idle_timeout_ms: Some(5_000),
+        requires_openai_auth: false,
+    };
+
+    let codex_home = TempDir::new().context("create temp dir for config")?;
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = provider.clone();
+    config.model_provider_id = provider.name.clone();
+    config
+        .model_providers
+        .insert(provider.name.clone(), provider.clone());
+
+    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
+    let new_conversation = conversation_manager
+        .new_conversation(config.clone())
+        .await
+        .context("spawn conversation")?;
+    let mut events: Vec<Event> = vec![Event {
+        id: String::new(),
+        msg: EventMsg::SessionConfigured(new_conversation.session_configured),
+    }];
+    let codex = new_conversation.conversation;
+
+    for op in &fixture.prompt_ops {
+        codex
+            .submit(op.clone())
+            .await
+            .with_context(|| format!("submit op {op:?}"))?;
+    }
+
+    let expected_event_count = fixture.expected.events.len();
+    anyhow::ensure!(
+        expected_event_count >= events.len(),
+        "expected events fixture must include at least the session configured event"
+    );
+
+    while events.len() < expected_event_count {
+        let next = timeout(Duration::from_secs(10), codex.next_event())
+            .await
+            .context("timeout waiting for event")??;
+        events.push(next);
+    }
+
+    loop {
+        let extra = match timeout(Duration::from_millis(200), codex.next_event()).await {
+            Ok(Ok(event)) => event,
+            Ok(Err(err)) => anyhow::bail!("error receiving extra event: {err}"),
+            Err(_) => break,
+        };
+        events.push(extra);
+    }
+
+    let received = server
+        .received_requests()
+        .await
+        .context("read recorded requests")?;
+    anyhow::ensure!(
+        received.len() == sequence_count,
+        "expected {sequence_count} Responses API requests but recorded {}",
+        received.len(),
+    );
+
+    let replacements = build_replacements(&config, codex_home.path(), &server);
+    let sanitized_events = sanitize_events(events, &replacements);
+
+    let mut request_values: Vec<Value> = Vec::new();
+    for req in received {
+        let body = req
+            .body_json::<Value>()
+            .context("parse request body JSON")?;
+        let sanitized = sanitize_request(body, &replacements);
+        request_values.push(sanitized);
+    }
+
+    anyhow::ensure!(
+        responder_state.lock().expect("lock bodies").is_empty(),
+        "unused SSE responses remain in fixture"
+    );
+
+    let request_value = match request_values.len() {
+        0 => Value::Array(Vec::new()),
+        1 => request_values.into_iter().next().expect("request value"),
+        _ => Value::Array(request_values),
+    };
+
+    let actual = HarnessOutputs {
+        request: request_value,
+        events: sanitized_events,
+    };
+
+    Ok(HarnessData {
+        actual,
+        expected: fixture.expected,
+    })
+}
+
+struct HarnessFixture {
+    prompt_ops: Vec<Op>,
+    expected: HarnessOutputs,
+    sse_sequences: Vec<String>,
+}
+
+impl HarnessFixture {
+    fn load(dir: &Path) -> Result<Self> {
+        let paths = HarnessFixturePaths::new(dir.to_path_buf());
+        let sse_sequences = load_sse_sequences(&paths.response_events)
+            .with_context(|| paths.context_message("load SSE fixture"))?;
+        let prompt_ops: Vec<Op> = load_json(&paths.user_prompts, "user prompts")?;
+        let expected_request: Value = load_json(&paths.expected_request, "expected request")?;
+        let expected_events: Vec<Value> = load_json(&paths.expected_events, "expected events")?;
+
+        let expected = HarnessOutputs {
+            request: expected_request,
+            events: expected_events,
+        };
+
+        Ok(Self {
+            prompt_ops,
+            expected,
+            sse_sequences,
+        })
+    }
+}
+
+struct HarnessFixturePaths {
+    dir: PathBuf,
+    response_events: PathBuf,
+    user_prompts: PathBuf,
+    expected_request: PathBuf,
+    expected_events: PathBuf,
+}
+
+impl HarnessFixturePaths {
+    fn new(dir: PathBuf) -> Self {
+        let response_events = dir.join("response_events.json");
+        let user_prompts = dir.join("user_prompts.json");
+        let expected_request = dir.join("expected_request.json");
+        let expected_events = dir.join("expected_events.json");
+        Self {
+            dir,
+            response_events,
+            user_prompts,
+            expected_request,
+            expected_events,
+        }
+    }
+
+    fn context_message(&self, action: &str) -> String {
+        format!("{action} for fixture {}", self.dir.display())
+    }
+}
+
+fn load_json<T>(path: &Path, description: &str) -> Result<T>
+where
+    T: DeserializeOwned,
+{
+    let file = File::open(path)
+        .with_context(|| format!("open {description} fixture {}", path.display()))?;
+    serde_json::from_reader(file)
+        .with_context(|| format!("parse {description} fixture {}", path.display()))
+}
+
+struct SequentialSseResponder {
+    bodies: Arc<Mutex<VecDeque<String>>>,
+}
+
+impl Respond for SequentialSseResponder {
+    fn respond(&self, _: &Request) -> ResponseTemplate {
+        let mut bodies = self.bodies.lock().expect("lock SSE bodies");
+        match bodies.pop_front() {
+            Some(body) => ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(body, "text/event-stream"),
+            None => ResponseTemplate::new(500).set_body_string("unexpected additional SSE request"),
+        }
+    }
+}
+
+fn load_sse_sequences(path: &Path) -> Result<Vec<String>> {
+    let file = File::open(path).with_context(|| format!("open SSE file {}", path.display()))?;
+    let value: Value = serde_json::from_reader(file)
+        .with_context(|| format!("parse SSE fixture {}", path.display()))?;
+    match value {
+        Value::Array(entries) => {
+            if entries.iter().all(Value::is_object) {
+                Ok(vec![events_to_sse(entries)?])
+            } else if entries.iter().all(Value::is_array) {
+                let mut bodies = Vec::new();
+                for seq in entries {
+                    let seq_events = seq.as_array().cloned().ok_or_else(|| {
+                        anyhow::anyhow!(
+                            "SSE fixture {} entries must be objects or arrays",
+                            path.display()
+                        )
+                    })?;
+                    bodies.push(events_to_sse(seq_events)?);
+                }
+                Ok(bodies)
+            } else {
+                anyhow::bail!(
+                    "SSE fixture {} must be an array of objects or an array of arrays",
+                    path.display()
+                );
+            }
+        }
+        _ => anyhow::bail!("SSE fixture {} must be a JSON array", path.display()),
+    }
+}
+
+fn events_to_sse(events: Vec<Value>) -> Result<String> {
+    let mut body = String::new();
+    for event in events {
+        let Some(obj) = event.as_object() else {
+            anyhow::bail!("SSE event must be an object: {event}");
+        };
+        let kind = obj
+            .get("type")
+            .and_then(Value::as_str)
+            .ok_or_else(|| anyhow::anyhow!("SSE event missing type: {event}"))?;
+        body.push_str(&format!("event: {kind}\n"));
+        if obj.len() > 1 {
+            body.push_str("data: ");
+            body.push_str(&serde_json::to_string(&event)?);
+            body.push('\n');
+        }
+        body.push('\n');
+    }
+    Ok(body)
+}
+
+fn build_replacements(
+    config: &codex_core::config::Config,
+    codex_home: &Path,
+    server: &MockServer,
+) -> Vec<(String, &'static str)> {
+    let mut pairs = Vec::new();
+    let cwd = config.cwd.to_string_lossy().into_owned();
+    if !cwd.is_empty() {
+        pairs.push((cwd, "<CWD>"));
+    }
+    let home = codex_home.to_string_lossy().into_owned();
+    if !home.is_empty() {
+        pairs.push((home, "<CODEX_HOME>"));
+    }
+    pairs.push((server.uri(), "<MOCK_SERVER>"));
+    pairs
+}
+
+fn sanitize_events(events: Vec<Event>, replacements: &[(String, &'static str)]) -> Vec<Value> {
+    events
+        .into_iter()
+        .map(|event| {
+            let mut value = serde_json::to_value(event).expect("serialize event");
+            sanitize_value(&mut value, replacements);
+
+            if let Some(msg) = value.get_mut("msg")
+                && let Some(msg_obj) = msg.as_object_mut()
+            {
+                let msg_type = msg_obj
+                    .get("type")
+                    .and_then(Value::as_str)
+                    .unwrap_or_default();
+                if msg_type == "session_configured" {
+                    msg_obj.insert(
+                        "session_id".to_string(),
+                        Value::String("<session>".to_string()),
+                    );
+                    if msg_obj.contains_key("history_log_id") {
+                        msg_obj.insert("history_log_id".to_string(), json!(0));
+                    }
+                    if msg_obj.contains_key("rollout_path") {
+                        msg_obj.insert(
+                            "rollout_path".to_string(),
+                            Value::String("<rollout>".to_string()),
+                        );
+                    }
+                }
+            }
+
+            value
+        })
+        .collect()
+}
+
+fn sanitize_request(mut value: Value, replacements: &[(String, &'static str)]) -> Value {
+    sanitize_value(&mut value, replacements);
+    if let Some(obj) = value.as_object_mut()
+        && obj.contains_key("prompt_cache_key")
+    {
+        obj.insert(
+            "prompt_cache_key".to_string(),
+            Value::String("<prompt_cache_key>".to_string()),
+        );
+    }
+    value
+}
+
+fn sanitize_value(value: &mut Value, replacements: &[(String, &'static str)]) {
+    match value {
+        Value::String(s) => {
+            let mut current = mem::take(s);
+            for (pattern, replacement) in replacements {
+                if !pattern.is_empty() && current.contains(pattern) {
+                    current = current.replace(pattern, replacement);
+                }
+            }
+            normalize_line_endings(&mut current);
+            *s = current;
+        }
+        Value::Array(items) => {
+            for item in items {
+                sanitize_value(item, replacements);
+            }
+        }
+        Value::Object(map) => {
+            for item in map.values_mut() {
+                sanitize_value(item, replacements);
+            }
+        }
+        _ => {}
+    }
+}
+
+fn normalize_line_endings(text: &mut String) {
+    if text.contains('\r') {
+        let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
+        *text = normalized;
+    }
+}
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -126,3 +126,5 @@ where
        }
    }
 }
+
+pub mod agent_harness;
--- a/codex-rs/core/tests/fixtures/harness/README.md
+++ b/codex-rs/core/tests/fixtures/harness/README.md
@@ -0,0 +1,32 @@
+# Agent Harness Fixtures
+
+These fixtures drive the integration tests in `core/tests/suite/agent_harness.rs`.
+Each subdirectory under this folder corresponds to a single end-to-end scenario.
+
+## Adding a New Fixture
+
+1. Create a new directory inside `tests/fixtures/harness/` and give it a
+   descriptive name (e.g. `multi_tool_call`).
+2. Add the following JSON files inside the directory:
+   - `user_prompts.json`: the list of `Op` objects that will be submitted to the
+     harness.
+   - `response_events.json`: the SSE payloads that the mock Responses API will
+     replay. A top-level array can contain objects (single request) or arrays
+     (multiple sequential requests).
+   - `expected_request.json`: the sanitized request body we expect the harness
+     to send. This can be either a single object or an array when multiple
+     requests are issued.
+   - `expected_events.json`: the sanitized Codex events we expect to observe.
+3. Run `cargo test -p codex-core suite::agent_harness::<fixture_name>` to verify
+   the scenario passes.
+
+The test macro in `core/tests/suite/agent_harness.rs` will automatically pick up
+the new directory once it exists.
+
+## Expected JSON Files Are Partial
+
+The comparison helpers only assert that the fields present in the expected JSON
+match the actual values. Any keys omitted from `expected_request.json` or
+`expected_events.json` are treated as "don't care" and are ignored during the
+assertion. This keeps the fixtures stable even when Codex introduces new fields;
+only include the pieces that matter for the scenario you are describing.
--- a/codex-rs/core/tests/fixtures/harness/multi_delta/expected_events.json
+++ b/codex-rs/core/tests/fixtures/harness/multi_delta/expected_events.json
@@ -0,0 +1,78 @@
+[
+  {
+    "id": "",
+    "msg": {
+      "history_entry_count": 0,
+      "history_log_id": 0,
+      "model": "gpt-5",
+      "rollout_path": "<rollout>",
+      "session_id": "<session>",
+      "type": "session_configured"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "model_context_window": 272000,
+      "type": "task_started"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": "I can",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": " hear",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": " you!",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "message": "I can hear you!",
+      "type": "agent_message"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "info": {
+        "last_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 12,
+          "output_tokens": 6,
+          "reasoning_output_tokens": 0,
+          "total_tokens": 18
+        },
+        "model_context_window": 272000,
+        "total_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 12,
+          "output_tokens": 6,
+          "reasoning_output_tokens": 0,
+          "total_tokens": 18
+        }
+      },
+      "type": "token_count"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "last_agent_message": "I can hear you!",
+      "type": "task_complete"
+    }
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/multi_delta/expected_request.json
+++ b/codex-rs/core/tests/fixtures/harness/multi_delta/expected_request.json
--- a/codex-rs/core/tests/fixtures/harness/multi_delta/response_events.json
+++ b/codex-rs/core/tests/fixtures/harness/multi_delta/response_events.json
@@ -0,0 +1,23 @@
+[
+  {"type": "response.created", "response": {"id": "resp_multi"}},
+  {"type": "response.output_text.delta", "delta": "I can"},
+  {"type": "response.output_text.delta", "delta": " hear"},
+  {"type": "response.output_text.delta", "delta": " you!"},
+  {"type": "response.output_item.done", "item": {
+    "type": "message",
+    "role": "assistant",
+    "content": [
+      {"type": "output_text", "text": "I can hear you!"}
+    ]
+  }},
+  {"type": "response.completed", "response": {
+    "id": "resp_multi",
+    "usage": {
+      "input_tokens": 12,
+      "input_tokens_details": null,
+      "output_tokens": 6,
+      "output_tokens_details": null,
+      "total_tokens": 18
+    }
+  }}
+]
--- a/codex-rs/core/tests/fixtures/harness/multi_delta/user_prompts.json
+++ b/codex-rs/core/tests/fixtures/harness/multi_delta/user_prompts.json
@@ -0,0 +1,8 @@
+[
+  {
+    "type": "user_input",
+    "items": [
+      {"type": "text", "text": "Can you hear me?"}
+    ]
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_events.json
+++ b/codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_events.json
@@ -0,0 +1,85 @@
+[
+  {
+    "id": "",
+    "msg": {
+      "history_entry_count": 0,
+      "history_log_id": 0,
+      "model": "gpt-5",
+      "rollout_path": "<rollout>",
+      "session_id": "<session>",
+      "type": "session_configured"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "model_context_window": 272000,
+      "type": "task_started"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": "Considering Earth's rotation.",
+      "type": "agent_reasoning_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": " Sunlight scatters differently.",
+      "type": "agent_reasoning_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": "Sunrise happens when your location rotates into sunlight.",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": " Sunset is the reverse, heading into night.",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "message": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies.",
+      "type": "agent_message"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "info": {
+        "last_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 18,
+          "output_tokens": 32,
+          "reasoning_output_tokens": 6,
+          "total_tokens": 50
+        },
+        "model_context_window": 272000,
+        "total_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 18,
+          "output_tokens": 32,
+          "reasoning_output_tokens": 6,
+          "total_tokens": 50
+        }
+      },
+      "type": "token_count"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "last_agent_message": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies.",
+      "type": "task_complete"
+    }
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_request.json
+++ b/codex-rs/core/tests/fixtures/harness/reasoning_turn/expected_request.json
--- a/codex-rs/core/tests/fixtures/harness/reasoning_turn/response_events.json
+++ b/codex-rs/core/tests/fixtures/harness/reasoning_turn/response_events.json
@@ -0,0 +1,56 @@
+[
+  {
+    "type": "response.created",
+    "response": {
+      "id": "resp_reason"
+    }
+  },
+  {
+    "type": "response.reasoning_summary_part.added"
+  },
+  {
+    "type": "response.reasoning_summary_text.delta",
+    "delta": "Considering Earth's rotation."
+  },
+  {
+    "type": "response.reasoning_summary_text.delta",
+    "delta": " Sunlight scatters differently."
+  },
+  {
+    "type": "response.output_text.delta",
+    "delta": "Sunrise happens when your location rotates into sunlight."
+  },
+  {
+    "type": "response.output_text.delta",
+    "delta": " Sunset is the reverse, heading into night."
+  },
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "message",
+      "role": "assistant",
+      "content": [
+        {
+          "type": "output_text",
+          "text": "Sunrise is when your spot on Earth rotates into daylight; sunset is the same rotation carrying you back into the planet's shadow. The colors differ because morning light travels through cooler, cleaner air, while evenings pass through dustier skies."
+        }
+      ]
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "resp_reason",
+      "usage": {
+        "input_tokens": 18,
+        "input_tokens_details": null,
+        "output_tokens": 32,
+        "output_tokens_details": {
+          "reasoning_tokens": 6
+        },
+        "total_tokens": 50,
+        "reasoning_tokens": 10
+      }
+    }
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/reasoning_turn/user_prompts.json
+++ b/codex-rs/core/tests/fixtures/harness/reasoning_turn/user_prompts.json
@@ -0,0 +1,11 @@
+[
+  {
+    "type": "user_input",
+    "items": [
+      {
+        "type": "text",
+        "text": "Explain the difference between sunrise and sunset."
+      }
+    ]
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/single_turn/expected_events.json
+++ b/codex-rs/core/tests/fixtures/harness/single_turn/expected_events.json
@@ -0,0 +1,63 @@
+[
+  {
+    "id": "",
+    "msg": {
+      "history_entry_count": 0,
+      "history_log_id": 0,
+      "model": "gpt-5",
+      "rollout_path": "<rollout>",
+      "session_id": "<session>",
+      "type": "session_configured"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "model_context_window": 272000,
+      "type": "task_started"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "delta": "Hello from fixture",
+      "type": "agent_message_delta"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "message": "Hello from fixture",
+      "type": "agent_message"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "info": {
+        "last_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 10,
+          "output_tokens": 5,
+          "reasoning_output_tokens": 0,
+          "total_tokens": 15
+        },
+        "total_token_usage": {
+          "cached_input_tokens": 0,
+          "input_tokens": 10,
+          "output_tokens": 5,
+          "reasoning_output_tokens": 0,
+          "total_tokens": 15
+        }
+      },
+      "type": "token_count"
+    }
+  },
+  {
+    "id": "0",
+    "msg": {
+      "last_agent_message": "Hello from fixture",
+      "type": "task_complete"
+    }
+  }
+]
--- a/codex-rs/core/tests/fixtures/harness/single_turn/expected_request.json
+++ b/codex-rs/core/tests/fixtures/harness/single_turn/expected_request.json
--- a/codex-rs/core/tests/fixtures/harness/single_turn/response_events.json
+++ b/codex-rs/core/tests/fixtures/harness/single_turn/response_events.json
@@ -0,0 +1,21 @@
+[
+  {"type": "response.created", "response": {"id": "resp_123"}},
+  {"type": "response.output_text.delta", "delta": "Hello from fixture"},
+  {"type": "response.output_item.done", "item": {
+    "type": "message",
+    "role": "assistant",
+    "content": [
+      {"type": "output_text", "text": "Hello from fixture"}
+    ]
+  }},
+  {"type": "response.completed", "response": {
+    "id": "resp_123",
+    "usage": {
+      "input_tokens": 10,
+      "input_tokens_details": null,
+      "output_tokens": 5,
+      "output_tokens_details": null,
+      "total_tokens": 15
+    }
+  }}
+]
--- a/codex-rs/core/tests/fixtures/harness/single_turn/user_prompts.json
+++ b/codex-rs/core/tests/fixtures/harness/single_turn/user_prompts.json
@@ -0,0 +1,8 @@
+[
+  {
+    "type": "user_input",
+    "items": [
+      {"type": "text", "text": "Hello?"}
+    ]
+  }
+]
--- a/codex-rs/core/tests/suite/agent_harness.rs
+++ b/codex-rs/core/tests/suite/agent_harness.rs
@@ -0,0 +1,116 @@
+use std::path::PathBuf;
+
+use core_test_support::agent_harness;
+use core_test_support::agent_harness::HarnessOutputs;
+use pretty_assertions::assert_eq;
+use serde_json::Value;
+
+const HARNESS_FIXTURE_ROOT: &str = "tests/fixtures/harness";
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn harness_fixtures_match_expectations() {
+    for fixture in fixture_names() {
+        run_fixture_test(&fixture).await;
+    }
+}
+
+async fn run_fixture_test(fixture: &str) {
+    let harness = agent_harness::run_fixture(fixture_dir(fixture))
+        .await
+        .unwrap_or_else(|err| panic!("run agent harness for fixture {fixture}: {err}"));
+
+    assert_harness_outputs_match(fixture, &harness.actual, &harness.expected);
+}
+
+#[track_caller]
+fn assert_harness_outputs_match(fixture: &str, actual: &HarnessOutputs, expected: &HarnessOutputs) {
+    assert_value_matches(fixture, &actual.request, &expected.request, "request");
+    assert_eq!(
+        actual.events.len(),
+        expected.events.len(),
+        "event count mismatch in fixture {fixture}: expected {} events but got {}",
+        expected.events.len(),
+        actual.events.len()
+    );
+    for (index, expected_event) in expected.events.iter().enumerate() {
+        let path = format!("events[{index}]");
+        let actual_event = actual
+            .events
+            .get(index)
+            .unwrap_or_else(|| panic!("missing actual event at {path} for fixture {fixture}"));
+        assert_value_matches(fixture, actual_event, expected_event, &path);
+    }
+}
+
+#[track_caller]
+fn assert_value_matches(fixture: &str, actual: &Value, expected: &Value, path: &str) {
+    match expected {
+        Value::Object(expected_map) => {
+            let actual_map = actual.as_object().unwrap_or_else(|| {
+                panic!("expected object at {path} in fixture {fixture}, got {actual:?}")
+            });
+            for (key, expected_value) in expected_map {
+                let next_path = if path.is_empty() {
+                    key.to_string()
+                } else {
+                    format!("{path}.{key}")
+                };
+                let actual_value = actual_map.get(key).unwrap_or_else(|| {
+                    panic!("missing field {next_path} in actual value for fixture {fixture}")
+                });
+                assert_value_matches(fixture, actual_value, expected_value, &next_path);
+            }
+        }
+        Value::Array(expected_items) => {
+            let actual_items = actual.as_array().unwrap_or_else(|| {
+                panic!("expected array at {path} in fixture {fixture}, got {actual:?}")
+            });
+            assert_eq!(
+                actual_items.len(),
+                expected_items.len(),
+                "array length mismatch at {path} in fixture {fixture}"
+            );
+            for (index, expected_value) in expected_items.iter().enumerate() {
+                let next_path = if path.is_empty() {
+                    format!("[{index}]")
+                } else {
+                    format!("{path}[{index}]")
+                };
+                let actual_value = actual_items.get(index).unwrap_or_else(|| {
+                    panic!("missing array element at {next_path} for fixture {fixture}")
+                });
+                assert_value_matches(fixture, actual_value, expected_value, &next_path);
+            }
+        }
+        _ => {
+            assert_eq!(actual, expected, "mismatch at {path} in fixture {fixture}");
+        }
+    }
+}
+fn fixtures_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(HARNESS_FIXTURE_ROOT)
+}
+
+fn fixture_dir(fixture: &str) -> PathBuf {
+    fixtures_root().join(fixture)
+}
+
+fn fixture_names() -> Vec<String> {
+    let dir = fixtures_root();
+    let mut names: Vec<String> = std::fs::read_dir(&dir)
+        .unwrap_or_else(|err| panic!("read fixture directory {}: {err}", dir.display()))
+        .filter_map(|entry| {
+            entry.ok().and_then(|e| {
+                let path = e.path();
+                (path.is_dir()).then(|| {
+                    path.file_name()
+                        .and_then(|name| name.to_str())
+                        .map(|name| name.to_string())
+                })
+            })
+        })
+        .flatten()
+        .collect();
+    names.sort();
+    names
+}
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -1,5 +1,6 @@
 // Aggregates all former standalone integration tests as modules.

+mod agent_harness;
 mod cli_stream;
 mod client;
 mod compact;
Author	SHA1	Message	Date
jimmyfraiture	cad43c500c	Fix line ending stuff	2025-09-22 14:07:56 +01:00
jimmyfraiture	d3bbb287ef	Drop env for tests	2025-09-22 11:23:49 +01:00
jimmyfraiture	5c229c58e8	Make harness more automatic and add a README	2025-09-22 10:07:12 +01:00
jimmyfraiture	15b40f9ae0	Add example fixture	2025-09-19 18:30:32 +02:00
jimmyfraiture	e2c9289504	V2	2025-09-19 18:20:14 +02:00
jimmyfraiture	f32b84e7fe	New test harness	2025-09-19 17:45:47 +02:00