Adressing feedback

This commit is contained in:
Ahmed Ibrahim
2025-07-12 17:15:37 -07:00
parent 0ece374c58
commit 51257e2fd0
3 changed files with 122 additions and 88 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -606,6 +606,7 @@ dependencies = [
"codex-login",
"codex-mcp-server",
"codex-tui",
"indoc",
"predicates",
"serde_json",
"tempfile",

View File

@@ -43,3 +43,4 @@ predicates = "3"
tempfile = "3"
wiremock = "0.6"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
indoc = "2"

View File

@@ -1,78 +1,27 @@
#![allow(clippy::unwrap_used)]
//! End-to-end integration tests for the `codex` CLI.
//!
//! These spin up a local [`wiremock`][] server to stand in for the MCP server
//! and then run the real compiled `codex` binary against it. The goal is to
//! verify the high-level request/response flow rather than the details of the
//! individual async functions.
//!
//! [`wiremock`]: https://docs.rs/wiremock
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use predicates::prelude::*;
use std::fs;
use std::path::Path;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
fn write_config(dir: &Path, server: &MockServer) {
fs::write(
dir.join("config.toml"),
format!(
r#"model_provider = "mock"
model = "test-model"
[model_providers.mock]
name = "mock"
base_url = "{}/v1"
env_key = "PATH"
wire_api = "responses"
"#,
server.uri()
),
)
.unwrap();
}
fn sse_message(text: &str) -> String {
format!(
"event: response.output_item.done\n\
data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
event: response.completed\n\
data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
)
}
fn sse_function_call() -> String {
let call = serde_json::json!({
"type": "response.output_item.done",
"item": {
"type": "function_call",
"name": "shell",
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
"call_id": "call1"
}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp1", "output": []}
});
format!(
"event: response.output_item.done\ndata: {call}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}
fn sse_final_after_call() -> String {
let msg = serde_json::json!({
"type": "response.output_item.done",
"item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp2", "output": []}
});
format!(
"event: response.output_item.done\ndata: {msg}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}
// ----- tests -----
/// Sends a single simple prompt and verifies that the streamed response is
/// surfaced to the user. This exercises the most common "ask a question, get a
/// textual answer" flow.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn full_conversation_turn_integration() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
@@ -81,35 +30,42 @@ async fn full_conversation_turn_integration() {
}
let server = MockServer::start().await;
let resp = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_message("Hello, world."), "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(resp)
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_message("Hello, world."), "text/event-stream"),
)
.expect(1)
.mount(&server)
.await;
// Disable retries — the mock server will fail hard if we make an unexpected
// request, so retries only slow the test down.
unsafe {
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
}
let home = TempDir::new().unwrap();
let codex_home = TempDir::new().unwrap();
let sandbox = TempDir::new().unwrap();
write_config(home.path(), &server);
write_config(codex_home.path(), &server);
let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
cmd.env("CODEX_HOME", home.path());
cmd.current_dir(sandbox.path());
cmd.arg("exec").arg("--skip-git-repo-check").arg("Hello");
cmd.env("CODEX_HOME", codex_home.path())
.current_dir(sandbox.path())
.arg("exec")
.arg("--skip-git-repo-check")
.arg("Hello");
cmd.assert()
.success()
.stdout(predicate::str::contains("Hello, world."));
}
/// Simulates a tool invocation (`shell`) followed by a second assistant message
/// once the tool call completes.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_invocation_flow() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
@@ -119,22 +75,21 @@ async fn tool_invocation_flow() {
let server = MockServer::start().await;
// The first request returns a function-call item; the second returns the
// final assistant message. Use an atomic counter to serve them in order.
struct SeqResponder {
count: std::sync::atomic::AtomicUsize,
}
impl wiremock::Respond for SeqResponder {
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
use std::sync::atomic::Ordering;
let n = self.count.fetch_add(1, Ordering::SeqCst);
if n == 0 {
ResponseTemplate::new(200)
match self.count.fetch_add(1, Ordering::SeqCst) {
0 => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_function_call(), "text/event-stream")
} else {
ResponseTemplate::new(200)
.set_body_raw(sse_function_call(), "text/event-stream"),
_ => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_final_after_call(), "text/event-stream")
.set_body_raw(sse_final_after_call(), "text/event-stream"),
}
}
}
@@ -153,14 +108,14 @@ async fn tool_invocation_flow() {
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
}
let home = TempDir::new().unwrap();
let codex_home = TempDir::new().unwrap();
let sandbox = TempDir::new().unwrap();
write_config(home.path(), &server);
write_config(codex_home.path(), &server);
let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
cmd.env("CODEX_HOME", home.path());
cmd.current_dir(sandbox.path());
cmd.arg("exec")
cmd.env("CODEX_HOME", codex_home.path())
.current_dir(sandbox.path())
.arg("exec")
.arg("--skip-git-repo-check")
.arg("Run shell");
@@ -169,3 +124,80 @@ async fn tool_invocation_flow() {
.stdout(predicate::str::contains("exec echo hi"))
.stdout(predicate::str::contains("hi"));
}
// ----- helpers (keep below the tests) -----
/// Write a minimal `config.toml` pointing the CLI at the mock server.
fn write_config(codex_home: &Path, server: &MockServer) {
fs::write(
codex_home.join("config.toml"),
format!(
r#"
model_provider = "mock"
model = "test-model"
[model_providers.mock]
name = "mock"
base_url = "{}/v1"
env_key = "PATH"
wire_api = "responses"
"#,
server.uri()
),
)
.unwrap();
}
/// Small helper to generate an SSE stream with a single assistant message.
fn sse_message(text: &str) -> String {
const TEMPLATE: &str = r#"event: response.output_item.done
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
event: response.completed
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
"#;
TEMPLATE.replace("TEXT_PLACEHOLDER", text)
}
/// Helper to craft an SSE stream that returns a `function_call`.
fn sse_function_call() -> String {
let call = serde_json::json!({
"type": "response.output_item.done",
"item": {
"type": "function_call",
"name": "shell",
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
"call_id": "call1"
}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp1", "output": []}
});
format!(
"event: response.output_item.done\ndata: {call}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}
/// SSE stream for the assistant's final message after the tool call returns.
fn sse_final_after_call() -> String {
let msg = serde_json::json!({
"type": "response.output_item.done",
"item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp2", "output": []}
});
format!(
"event: response.output_item.done\ndata: {msg}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}