mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
Compare commits
1 Commits
1271d450b1
...
codex/refa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4432cbdb90 |
@@ -15,6 +15,7 @@ use core_test_support::wait_for_event;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
@@ -72,19 +73,19 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
let first_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
let second_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
let third_mock = mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
// Build config pointing to the mock server and spawn Codex.
|
||||
let model_provider = ModelProviderInfo {
|
||||
@@ -130,16 +131,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Inspect the three captured requests.
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 3, "expected exactly three requests");
|
||||
let req1 = first_mock.single_request();
|
||||
let req2 = second_mock.single_request();
|
||||
let req3 = third_mock.single_request();
|
||||
|
||||
let req1 = &requests[0];
|
||||
let req2 = &requests[1];
|
||||
let req3 = &requests[2];
|
||||
|
||||
let body1 = req1.body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = req2.body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = req3.body_json::<serde_json::Value>().unwrap();
|
||||
let body1 = req1.body_json();
|
||||
let body2 = req2.body_json();
|
||||
let body3 = req3.body_json();
|
||||
|
||||
// Manual compact should keep the baseline developer instructions.
|
||||
let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
|
||||
@@ -288,7 +286,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
let first_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
@@ -296,13 +294,13 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
let second_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
let third_mock = mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -343,36 +341,36 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
// wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
requests.extend(third_mock.requests());
|
||||
assert!(
|
||||
requests.len() >= 3,
|
||||
"auto compact should add at least a third request, got {}",
|
||||
requests.len()
|
||||
);
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
let bodies: Vec<_> = requests.iter().map(ResponsesRequest::body_json).collect();
|
||||
let is_auto_compact = |body: &serde_json::Value| {
|
||||
body.to_string()
|
||||
.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
let auto_compact_count = bodies.iter().filter(|body| is_auto_compact(body)).count();
|
||||
assert_eq!(
|
||||
auto_compact_count, 1,
|
||||
"expected exactly one auto compact request"
|
||||
);
|
||||
let auto_compact_index = requests
|
||||
let auto_compact_index = bodies
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| is_auto_compact(req).then_some(idx))
|
||||
.find_map(|(idx, body)| is_auto_compact(body).then_some(idx))
|
||||
.expect("auto compact request missing");
|
||||
assert_eq!(
|
||||
auto_compact_index, 2,
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = requests[auto_compact_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let body_first = &bodies[0];
|
||||
let body3 = &bodies[auto_compact_index];
|
||||
let instructions = body3
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
@@ -433,7 +431,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
let first_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
@@ -441,13 +439,13 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
let second_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
let third_mock = mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -595,14 +593,17 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
requests.extend(third_mock.requests());
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
3,
|
||||
"auto compact should attempt at most one summarization before erroring"
|
||||
);
|
||||
|
||||
let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
|
||||
let bodies: Vec<_> = requests.iter().map(ResponsesRequest::body_json).collect();
|
||||
let last_body = &bodies[2];
|
||||
let input = last_body
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
@@ -780,7 +781,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
ev_completed_with_tokens("r6", 120),
|
||||
]);
|
||||
|
||||
mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;
|
||||
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -831,12 +832,10 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"auto compact should not emit task lifecycle events"
|
||||
);
|
||||
|
||||
let request_bodies: Vec<String> = server
|
||||
.received_requests()
|
||||
.await
|
||||
.unwrap()
|
||||
let request_bodies: Vec<String> = request_log
|
||||
.requests()
|
||||
.into_iter()
|
||||
.map(|request| String::from_utf8(request.body).unwrap_or_default())
|
||||
.map(|request| request.body_json().to_string())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
request_bodies.len(),
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -69,11 +70,13 @@ async fn grep_files_tool_collects_matches() -> Result<()> {
|
||||
})
|
||||
.to_string();
|
||||
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
let (first_mock, second_mock) =
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
submit_turn(&test, "please find uses of needle").await?;
|
||||
|
||||
let bodies = recorded_bodies(&server).await?;
|
||||
let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
let tool_output = find_tool_output(&requests, call_id).expect("tool output present");
|
||||
let payload = tool_output.get("output").expect("output field present");
|
||||
let (content_opt, success_opt) = extract_content_and_success(payload);
|
||||
let content = content_opt.expect("content present");
|
||||
@@ -118,11 +121,13 @@ async fn grep_files_tool_reports_empty_results() -> Result<()> {
|
||||
})
|
||||
.to_string();
|
||||
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
let (first_mock, second_mock) =
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
submit_turn(&test, "search again").await?;
|
||||
|
||||
let bodies = recorded_bodies(&server).await?;
|
||||
let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
let tool_output = find_tool_output(&requests, call_id).expect("tool output present");
|
||||
let payload = tool_output.get("output").expect("output field present");
|
||||
let (content_opt, success_opt) = extract_content_and_success(payload);
|
||||
let content = content_opt.expect("content present");
|
||||
@@ -174,41 +179,34 @@ async fn mount_tool_sequence(
|
||||
call_id: &str,
|
||||
arguments: &str,
|
||||
tool_name: &str,
|
||||
) {
|
||||
) -> (responses::ResponseMock, responses::ResponseMock) {
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, tool_name, arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(server, any(), first_response).await;
|
||||
let first_mock = responses::mount_sse_once_match(server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(server, any(), second_response).await;
|
||||
let second_mock = responses::mount_sse_once_match(server, any(), second_response).await;
|
||||
|
||||
(first_mock, second_mock)
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn recorded_bodies(server: &wiremock::MockServer) -> Result<Vec<Value>> {
|
||||
let requests = server.received_requests().await.expect("requests recorded");
|
||||
Ok(requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn find_tool_output<'a>(requests: &'a [Value], call_id: &str) -> Option<&'a Value> {
|
||||
requests.iter().find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})
|
||||
})
|
||||
})
|
||||
fn find_tool_output(requests: &[ResponsesRequest], call_id: &str) -> Option<Value> {
|
||||
for request in requests {
|
||||
for item in request.input() {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return Some(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn collect_file_names(content: &str) -> HashSet<String> {
|
||||
|
||||
@@ -54,13 +54,13 @@ async fn list_dir_tool_returns_entries() -> anyhow::Result<()> {
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
let first_mock = responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -81,30 +81,15 @@ async fn list_dir_tool_returns_entries() -> anyhow::Result<()> {
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
assert!(!requests.is_empty(), "expected at least one request body");
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
let tool_output_item = requests
|
||||
.into_iter()
|
||||
.flat_map(|request| request.input())
|
||||
.find(|item| item.get("type").and_then(Value::as_str) == Some("function_call_output"))
|
||||
.unwrap_or_else(|| panic!("function_call_output item not found in requests"));
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
@@ -159,13 +144,13 @@ async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> {
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
let first_mock = responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -186,30 +171,15 @@ async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> {
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
assert!(!requests.is_empty(), "expected at least one request body");
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
let tool_output_item = requests
|
||||
.into_iter()
|
||||
.flat_map(|request| request.input())
|
||||
.find(|item| item.get("type").and_then(Value::as_str) == Some("function_call_output"))
|
||||
.unwrap_or_else(|| panic!("function_call_output item not found in requests"));
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
@@ -271,13 +241,13 @@ async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()>
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
let first_mock = responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -298,30 +268,15 @@ async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()>
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
assert!(!requests.is_empty(), "expected at least one request body");
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
let tool_output_item = requests
|
||||
.into_iter()
|
||||
.flat_map(|request| request.input())
|
||||
.find(|item| item.get("type").and_then(Value::as_str) == Some("function_call_output"))
|
||||
.unwrap_or_else(|| panic!("function_call_output item not found in requests"));
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
@@ -386,13 +341,13 @@ async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<()
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
let first_mock = responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -413,30 +368,15 @@ async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<()
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
let mut requests = first_mock.requests();
|
||||
requests.extend(second_mock.requests());
|
||||
assert!(!requests.is_empty(), "expected at least one request body");
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
let tool_output_item = requests
|
||||
.into_iter()
|
||||
.flat_map(|request| request.input())
|
||||
.find(|item| item.get("type").and_then(Value::as_str) == Some("function_call_output"))
|
||||
.unwrap_or_else(|| panic!("function_call_output item not found in requests"));
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -49,22 +50,13 @@ async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPoli
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn request_bodies(requests: &[wiremock::Request]) -> Result<Vec<Value>> {
|
||||
requests
|
||||
.iter()
|
||||
.map(|req| Ok(serde_json::from_slice::<Value>(&req.body)?))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn find_function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return Some(item);
|
||||
}
|
||||
fn find_function_call_output(requests: &[ResponsesRequest], call_id: &str) -> Option<Value> {
|
||||
for request in requests {
|
||||
for item in request.input() {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return Some(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -99,7 +91,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
@@ -108,12 +100,8 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
|
||||
let requests = response_mock.requests();
|
||||
let output_item = find_function_call_output(&requests, call_id).expect("shell output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
@@ -163,7 +151,7 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
@@ -172,13 +160,9 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let requests = response_mock.requests();
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("structured output present");
|
||||
find_function_call_output(&requests, call_id).expect("structured output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
@@ -226,7 +210,7 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
@@ -235,13 +219,9 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let requests = response_mock.requests();
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("truncated output present");
|
||||
find_function_call_output(&requests, call_id).expect("truncated output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -31,20 +32,18 @@ fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_tool_outputs(bodies: &[Value]) -> Result<HashMap<String, Value>> {
|
||||
fn collect_tool_outputs(requests: &[ResponsesRequest]) -> Result<HashMap<String, Value>> {
|
||||
let mut outputs = HashMap::new();
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) != Some("function_call_output") {
|
||||
continue;
|
||||
}
|
||||
if let Some(call_id) = item.get("call_id").and_then(Value::as_str) {
|
||||
let content = extract_output_text(item)
|
||||
.ok_or_else(|| anyhow::anyhow!("missing tool output content"))?;
|
||||
let parsed: Value = serde_json::from_str(content)?;
|
||||
outputs.insert(call_id.to_string(), parsed);
|
||||
}
|
||||
for request in requests {
|
||||
for item in request.input() {
|
||||
if item.get("type").and_then(Value::as_str) != Some("function_call_output") {
|
||||
continue;
|
||||
}
|
||||
if let Some(call_id) = item.get("call_id").and_then(Value::as_str) {
|
||||
let content = extract_output_text(&item)
|
||||
.ok_or_else(|| anyhow::anyhow!("missing tool output content"))?;
|
||||
let parsed: Value = serde_json::from_str(content)?;
|
||||
outputs.insert(call_id.to_string(), parsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,7 +104,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -126,15 +125,10 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let requests = response_mock.requests();
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let outputs = collect_tool_outputs(&requests)?;
|
||||
|
||||
let start_output = outputs
|
||||
.get(first_call_id)
|
||||
@@ -240,7 +234,7 @@ PY
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -261,15 +255,10 @@ PY
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let requests = response_mock.requests();
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let outputs = collect_tool_outputs(&requests)?;
|
||||
|
||||
let start_output = outputs
|
||||
.get(first_call_id)
|
||||
@@ -346,7 +335,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
let response_mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
@@ -372,15 +361,10 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let requests = response_mock.requests();
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let outputs = collect_tool_outputs(&requests)?;
|
||||
|
||||
let first_output = outputs.get(first_call_id).expect("missing timeout output");
|
||||
assert_eq!(first_output["session_id"], "0");
|
||||
|
||||
Reference in New Issue
Block a user