mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
Merge branch 'jif/tool_refactors-1' into jif/parallel-tool-calls
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use mcp_types::CallToolResult;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
@@ -68,7 +72,7 @@ pub enum ToolOutput {
|
||||
impl ToolOutput {
|
||||
pub fn log_preview(&self) -> String {
|
||||
match self {
|
||||
ToolOutput::Function { content, .. } => content.clone(),
|
||||
ToolOutput::Function { content, .. } => telemetry_preview(content),
|
||||
ToolOutput::Mcp { result } => format!("{result:?}"),
|
||||
}
|
||||
}
|
||||
@@ -103,6 +107,46 @@ impl ToolOutput {
|
||||
}
|
||||
}
|
||||
|
||||
fn telemetry_preview(content: &str) -> String {
|
||||
let truncated_slice = take_bytes_at_char_boundary(content, TELEMETRY_PREVIEW_MAX_BYTES);
|
||||
let truncated_by_bytes = truncated_slice.len() < content.len();
|
||||
|
||||
let mut preview = String::new();
|
||||
let mut lines_iter = truncated_slice.lines();
|
||||
for idx in 0..TELEMETRY_PREVIEW_MAX_LINES {
|
||||
match lines_iter.next() {
|
||||
Some(line) => {
|
||||
if idx > 0 {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(line);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
let truncated_by_lines = lines_iter.next().is_some();
|
||||
|
||||
if !truncated_by_bytes && !truncated_by_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
|
||||
if preview.len() < truncated_slice.len()
|
||||
&& truncated_slice
|
||||
.as_bytes()
|
||||
.get(preview.len())
|
||||
.is_some_and(|byte| *byte == b'\n')
|
||||
{
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
if !preview.is_empty() && !preview.ends_with('\n') {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(TELEMETRY_PREVIEW_TRUNCATION_NOTICE);
|
||||
|
||||
preview
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -148,6 +192,38 @@ mod tests {
|
||||
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
assert_eq!(telemetry_preview(content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_bytes() {
|
||||
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
||||
let preview = telemetry_preview(&content);
|
||||
|
||||
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
assert!(
|
||||
preview.len()
|
||||
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_lines() {
|
||||
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
||||
.map(|idx| format!("line {idx}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let preview = telemetry_preview(&content);
|
||||
let lines: Vec<&str> = preview.lines().collect();
|
||||
|
||||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::tools::registry::ToolKind;
|
||||
|
||||
pub struct ReadFileHandler;
|
||||
|
||||
const MAX_LINE_LENGTH: usize = 200;
|
||||
const MAX_LINE_LENGTH: usize = 500;
|
||||
|
||||
fn default_offset() -> usize {
|
||||
1
|
||||
@@ -105,15 +105,28 @@ async fn read_file_slice(
|
||||
.await
|
||||
.map_err(|err| FunctionCallError::RespondToModel(format!("failed to read file: {err}")))?;
|
||||
|
||||
let mut reader = BufReader::new(file).lines();
|
||||
let mut reader = BufReader::new(file);
|
||||
let mut collected = Vec::new();
|
||||
let mut seen = 0usize;
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
loop {
|
||||
buffer.clear();
|
||||
let bytes_read = reader.read_until(b'\n', &mut buffer).await.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("failed to read file: {err}"))
|
||||
})?;
|
||||
|
||||
if bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if buffer.last() == Some(&b'\n') {
|
||||
buffer.pop();
|
||||
if buffer.last() == Some(&b'\r') {
|
||||
buffer.pop();
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(line) = reader
|
||||
.next_line()
|
||||
.await
|
||||
.map_err(|err| FunctionCallError::RespondToModel(format!("failed to read file: {err}")))?
|
||||
{
|
||||
seen += 1;
|
||||
|
||||
if seen < offset {
|
||||
@@ -124,12 +137,7 @@ async fn read_file_slice(
|
||||
break;
|
||||
}
|
||||
|
||||
let formatted = if line.len() > MAX_LINE_LENGTH {
|
||||
take_bytes_at_char_boundary(&line, MAX_LINE_LENGTH).to_string()
|
||||
} else {
|
||||
line
|
||||
};
|
||||
|
||||
let formatted = format_line(&buffer);
|
||||
collected.push(format!("L{seen}: {formatted}"));
|
||||
|
||||
if collected.len() == limit {
|
||||
@@ -146,6 +154,15 @@ async fn read_file_slice(
|
||||
Ok(collected)
|
||||
}
|
||||
|
||||
fn format_line(bytes: &[u8]) -> String {
|
||||
let decoded = String::from_utf8_lossy(bytes);
|
||||
if decoded.len() > MAX_LINE_LENGTH {
|
||||
take_bytes_at_char_boundary(&decoded, MAX_LINE_LENGTH).to_string()
|
||||
} else {
|
||||
decoded.into_owned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -179,4 +196,17 @@ mod tests {
|
||||
FunctionCallError::RespondToModel("offset exceeds file length".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reads_non_utf8_lines() {
|
||||
let mut temp = NamedTempFile::new().expect("create temp file");
|
||||
use std::io::Write as _;
|
||||
temp.as_file_mut().write_all(b"\xff\xfe\nplain\n").unwrap();
|
||||
|
||||
let lines = read_file_slice(temp.path(), 1, 2)
|
||||
.await
|
||||
.expect("read slice");
|
||||
let expected_first = format!("L1: {}{}", '\u{FFFD}', '\u{FFFD}');
|
||||
assert_eq!(lines, vec![expected_first, "L2: plain".to_string()]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,12 @@ pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
|
||||
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
|
||||
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
pub(crate) async fn handle_container_exec_with_params(
|
||||
tool_name: &str,
|
||||
params: ExecParams,
|
||||
@@ -74,7 +80,7 @@ pub(crate) async fn handle_container_exec_with_params(
|
||||
// could not resolve it into a patch that would apply
|
||||
// cleanly. Return to model for resample.
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"error: {parse_error:#?}"
|
||||
"apply_patch verification failed: {parse_error}"
|
||||
)));
|
||||
}
|
||||
MaybeApplyPatchVerified::ShellParseError(error) => {
|
||||
@@ -219,22 +225,26 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
return s.to_string();
|
||||
}
|
||||
|
||||
let lines: Vec<&str> = s.lines().collect();
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(lines.len());
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(lines.len().saturating_sub(head_take));
|
||||
let omitted = lines.len().saturating_sub(head_take + tail_take);
|
||||
let segments: Vec<&str> = s.split_inclusive('\n').collect();
|
||||
let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
|
||||
let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
// Join head and tail blocks (lines() strips newlines; reinsert them)
|
||||
let head_block = lines
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let tail_block = if tail_take > 0 {
|
||||
lines[lines.len() - tail_take..].join("\n")
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
s.len()
|
||||
} else {
|
||||
String::new()
|
||||
s.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let marker = format!("\n[... omitted {omitted} of {total_lines} lines ...]\n\n");
|
||||
|
||||
@@ -250,19 +260,20 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
|
||||
}
|
||||
|
||||
// Enforce line-count cap by trimming head/tail lines
|
||||
let head_lines_text = head_block;
|
||||
let tail_lines_text = tail_block;
|
||||
// Build final string respecting byte budgets
|
||||
let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
|
||||
let head_slice = &s[..head_slice_end];
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
result.push_str(&marker);
|
||||
|
||||
let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
|
||||
let tail_budget_final = remaining;
|
||||
let tail_part = take_last_bytes_at_char_boundary(&tail_lines_text, tail_budget_final);
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_slice = &s[tail_slice_start..];
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
|
||||
@@ -47,7 +47,6 @@ pub trait ToolHandler: Send + Sync {
|
||||
matches!(
|
||||
(self.kind(), payload),
|
||||
(ToolKind::Function, ToolPayload::Function { .. })
|
||||
| (ToolKind::Function, ToolPayload::UnifiedExec { .. })
|
||||
| (ToolKind::UnifiedExec, ToolPayload::UnifiedExec { .. })
|
||||
| (ToolKind::Mcp, ToolPayload::Mcp { .. })
|
||||
)
|
||||
@@ -89,7 +88,7 @@ impl ToolRegistry {
|
||||
let call_id_owned = invocation.call_id.clone();
|
||||
let otel = invocation.turn.client.get_otel_event_manager();
|
||||
let payload_for_response = invocation.payload.clone();
|
||||
let log_payload = payload_for_response.log_payload().into_owned();
|
||||
let log_payload = payload_for_response.log_payload();
|
||||
|
||||
let entry = match self.handlers.get(tool_name.as_str()) {
|
||||
Some(entry) => entry,
|
||||
@@ -99,7 +98,7 @@ impl ToolRegistry {
|
||||
otel.tool_result(
|
||||
tool_name.as_ref(),
|
||||
&call_id_owned,
|
||||
&log_payload,
|
||||
log_payload.as_ref(),
|
||||
Duration::ZERO,
|
||||
false,
|
||||
&message,
|
||||
@@ -115,7 +114,7 @@ impl ToolRegistry {
|
||||
otel.tool_result(
|
||||
tool_name.as_ref(),
|
||||
&call_id_owned,
|
||||
&log_payload,
|
||||
log_payload.as_ref(),
|
||||
Duration::ZERO,
|
||||
false,
|
||||
&message,
|
||||
@@ -123,35 +122,36 @@ impl ToolRegistry {
|
||||
return Err(FunctionCallError::RespondToModel(message));
|
||||
}
|
||||
|
||||
let output_cell = std::sync::Mutex::new(None);
|
||||
let output_cell = tokio::sync::Mutex::new(None);
|
||||
|
||||
let result = otel
|
||||
.log_tool_result(tool_name.as_ref(), &call_id_owned, &log_payload, || {
|
||||
let handler = handler.clone();
|
||||
let output_cell = &output_cell;
|
||||
let invocation = invocation;
|
||||
async move {
|
||||
match handler.handle(invocation).await {
|
||||
Ok(output) => {
|
||||
let preview = output.log_preview();
|
||||
let success = output.success_for_logging();
|
||||
let mut guard = output_cell
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
*guard = Some(output);
|
||||
Ok((preview, success))
|
||||
.log_tool_result(
|
||||
tool_name.as_ref(),
|
||||
&call_id_owned,
|
||||
log_payload.as_ref(),
|
||||
|| {
|
||||
let handler = handler.clone();
|
||||
let output_cell = &output_cell;
|
||||
let invocation = invocation;
|
||||
async move {
|
||||
match handler.handle(invocation).await {
|
||||
Ok(output) => {
|
||||
let preview = output.log_preview();
|
||||
let success = output.success_for_logging();
|
||||
let mut guard = output_cell.lock().await;
|
||||
*guard = Some(output);
|
||||
Ok((preview, success))
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
let mut guard = output_cell
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let mut guard = output_cell.lock().await;
|
||||
let output = guard.take().ok_or_else(|| {
|
||||
FunctionCallError::RespondToModel("tool produced no output".to_string())
|
||||
})?;
|
||||
|
||||
@@ -145,7 +145,7 @@ impl Router {
|
||||
sub_id: &str,
|
||||
call: ToolCall,
|
||||
) -> ResponseInputItem {
|
||||
let payload_clone = call.payload.clone();
|
||||
let payload_outputs_custom = matches!(call.payload, ToolPayload::Custom { .. });
|
||||
let ToolCall {
|
||||
tool_name,
|
||||
call_id,
|
||||
@@ -165,28 +165,29 @@ impl Router {
|
||||
|
||||
match self.registry.dispatch(invocation).await {
|
||||
Ok(response) => response,
|
||||
Err(err) => Self::failure_response(call_id, payload_clone, err),
|
||||
Err(err) => Self::failure_response(call_id, payload_outputs_custom, err),
|
||||
}
|
||||
}
|
||||
|
||||
fn failure_response(
|
||||
call_id: String,
|
||||
payload: ToolPayload,
|
||||
payload_outputs_custom: bool,
|
||||
err: FunctionCallError,
|
||||
) -> ResponseInputItem {
|
||||
let message = err.to_string();
|
||||
match payload {
|
||||
ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
|
||||
if payload_outputs_custom {
|
||||
ResponseInputItem::CustomToolCallOutput {
|
||||
call_id,
|
||||
output: message,
|
||||
},
|
||||
_ => ResponseInputItem::FunctionCallOutput {
|
||||
}
|
||||
} else {
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: codex_protocol::models::FunctionCallOutputPayload {
|
||||
content: message,
|
||||
success: Some(false),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,9 @@ use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::ConfigToml;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use assert_cmd::cargo::cargo_bin;
|
||||
|
||||
pub mod responses;
|
||||
pub mod test_codex;
|
||||
pub mod test_codex_exec;
|
||||
@@ -17,12 +20,25 @@ pub mod test_codex_exec;
|
||||
pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
|
||||
Config::load_from_base_config_with_overrides(
|
||||
ConfigToml::default(),
|
||||
ConfigOverrides::default(),
|
||||
default_test_overrides(),
|
||||
codex_home.path().to_path_buf(),
|
||||
)
|
||||
.expect("defaults for test should always succeed")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn default_test_overrides() -> ConfigOverrides {
|
||||
ConfigOverrides {
|
||||
codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")),
|
||||
..ConfigOverrides::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn default_test_overrides() -> ConfigOverrides {
|
||||
ConfigOverrides::default()
|
||||
}
|
||||
|
||||
/// Builds an SSE stream body from a JSON fixture.
|
||||
///
|
||||
/// The fixture must contain an array of objects where each object represents a
|
||||
|
||||
@@ -23,5 +23,6 @@ mod seatbelt;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
mod tool_harness;
|
||||
mod unified_exec;
|
||||
mod user_notification;
|
||||
mod view_image;
|
||||
|
||||
@@ -464,3 +464,105 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-parse-error";
|
||||
let patch_content = r"*** Begin Patch
|
||||
*** Update File: broken.txt
|
||||
*** End Patch";
|
||||
|
||||
let first_response = sse(vec![
|
||||
serde_json::json!({
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp-1"}
|
||||
}),
|
||||
ev_apply_patch_function_call(call_id, patch_content),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "failed"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![InputItem::Text {
|
||||
text: "please apply a patch".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
loop {
|
||||
let event = codex.next_event().await.expect("event");
|
||||
if matches!(event.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
|
||||
.expect("function_call_output item not found in requests");
|
||||
let output_item = function_call_output(body_with_tool_output).expect("tool output item");
|
||||
assert_eq!(
|
||||
output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = extract_output_text(output_item).expect("output text present");
|
||||
|
||||
assert!(
|
||||
output_text.contains("apply_patch verification failed"),
|
||||
"expected apply_patch verification failure message, got {output_text:?}"
|
||||
);
|
||||
assert!(
|
||||
output_text.contains("invalid hunk"),
|
||||
"expected parse diagnostics in output text, got {output_text:?}"
|
||||
);
|
||||
|
||||
if let Some(success_flag) = output_item
|
||||
.get("output")
|
||||
.and_then(|value| value.as_object())
|
||||
.and_then(|obj| obj.get("success"))
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
{
|
||||
assert!(
|
||||
!success_flag,
|
||||
"expected tool output to mark success=false for parse failures"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
280
codex-rs/core/tests/suite/unified_exec.rs
Normal file
280
codex-rs/core/tests/suite/unified_exec.rs
Normal file
@@ -0,0 +1,280 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use serde_json::Value;
|
||||
|
||||
fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
item.get("output").and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_tool_outputs(bodies: &[Value]) -> Result<HashMap<String, Value>> {
|
||||
let mut outputs = HashMap::new();
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) != Some("function_call_output") {
|
||||
continue;
|
||||
}
|
||||
if let Some(call_id) = item.get("call_id").and_then(Value::as_str) {
|
||||
let content = extract_output_text(item)
|
||||
.ok_or_else(|| anyhow::anyhow!("missing tool output content"))?;
|
||||
let parsed: Value = serde_json::from_str(content)?;
|
||||
outputs.insert(call_id.to_string(), parsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(outputs)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let first_call_id = "uexec-start";
|
||||
let first_args = serde_json::json!({
|
||||
"input": ["/bin/cat"],
|
||||
"timeout_ms": 200,
|
||||
});
|
||||
|
||||
let second_call_id = "uexec-stdin";
|
||||
let second_args = serde_json::json!({
|
||||
"input": ["hello unified exec\n"],
|
||||
"session_id": "0",
|
||||
"timeout_ms": 500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(
|
||||
first_call_id,
|
||||
"unified_exec",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
|
||||
ev_function_call(
|
||||
second_call_id,
|
||||
"unified_exec",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![InputItem::Text {
|
||||
text: "run unified exec".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
loop {
|
||||
let event = codex.next_event().await.expect("event");
|
||||
if matches!(event.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
|
||||
let start_output = outputs
|
||||
.get(first_call_id)
|
||||
.expect("missing first unified_exec output");
|
||||
let session_id = start_output["session_id"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
!session_id.is_empty(),
|
||||
"expected session id in first unified_exec response"
|
||||
);
|
||||
assert!(
|
||||
start_output["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.is_empty()
|
||||
);
|
||||
|
||||
let reuse_output = outputs
|
||||
.get(second_call_id)
|
||||
.expect("missing reused unified_exec output");
|
||||
assert_eq!(
|
||||
reuse_output["session_id"].as_str().unwrap_or_default(),
|
||||
session_id
|
||||
);
|
||||
let echoed = reuse_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
echoed.contains("hello unified exec"),
|
||||
"expected echoed output, got {echoed:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let first_call_id = "uexec-timeout";
|
||||
let first_args = serde_json::json!({
|
||||
"input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
|
||||
"timeout_ms": 10,
|
||||
});
|
||||
|
||||
let second_call_id = "uexec-poll";
|
||||
let second_args = serde_json::json!({
|
||||
"input": Vec::<String>::new(),
|
||||
"session_id": "0",
|
||||
"timeout_ms": 800,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(
|
||||
first_call_id,
|
||||
"unified_exec",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
|
||||
ev_function_call(
|
||||
second_call_id,
|
||||
"unified_exec",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![InputItem::Text {
|
||||
text: "check timeout".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
loop {
|
||||
let event = codex.next_event().await.expect("event");
|
||||
if matches!(event.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
|
||||
let first_output = outputs.get(first_call_id).expect("missing timeout output");
|
||||
assert_eq!(first_output["session_id"], "0");
|
||||
assert!(
|
||||
first_output["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.is_empty()
|
||||
);
|
||||
|
||||
let poll_output = outputs.get(second_call_id).expect("missing poll output");
|
||||
let output_text = poll_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output_text.contains("ready"),
|
||||
"expected ready output, got {output_text:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -14,6 +14,7 @@ use eventsource_stream::EventStreamError as StreamError;
|
||||
use reqwest::Error;
|
||||
use reqwest::Response;
|
||||
use serde::Serialize;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
@@ -377,8 +378,8 @@ impl OtelEventManager {
|
||||
let duration = start.elapsed();
|
||||
|
||||
let (output, success) = match &result {
|
||||
Ok((preview, success)) => (preview.clone(), *success),
|
||||
Err(error) => (error.to_string(), false),
|
||||
Ok((preview, success)) => (Cow::Borrowed(preview.as_str()), *success),
|
||||
Err(error) => (Cow::Owned(error.to_string()), false),
|
||||
};
|
||||
|
||||
let success_str = if success { "true" } else { "false" };
|
||||
@@ -399,6 +400,7 @@ impl OtelEventManager {
|
||||
arguments = %arguments,
|
||||
duration_ms = %duration.as_millis(),
|
||||
success = %success_str,
|
||||
// `output` is truncated by the tool layer before reaching telemetry.
|
||||
output = %output,
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user