Merge branch 'jif/tool_refactors-1' into jif/parallel-tool-calls

2026-04-24 14:45:27 +00:00 · 2025-10-02 13:35:01 +01:00
parent a300bb3cd7 0d05d03b94
commit e0850b71f8
10 changed files with 589 additions and 70 deletions
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -1,11 +1,15 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
+use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
+use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_otel::otel_event_manager::OtelEventManager;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ShellToolCallParams;
 use codex_protocol::protocol::FileChange;
+use codex_utils_string::take_bytes_at_char_boundary;
 use mcp_types::CallToolResult;
 use std::borrow::Cow;
 use std::collections::HashMap;
@@ -68,7 +72,7 @@ pub enum ToolOutput {
 impl ToolOutput {
    pub fn log_preview(&self) -> String {
        match self {
-            ToolOutput::Function { content, .. } => content.clone(),
+            ToolOutput::Function { content, .. } => telemetry_preview(content),
            ToolOutput::Mcp { result } => format!("{result:?}"),
        }
    }
@@ -103,6 +107,46 @@ impl ToolOutput {
    }
 }

+fn telemetry_preview(content: &str) -> String {
+    let truncated_slice = take_bytes_at_char_boundary(content, TELEMETRY_PREVIEW_MAX_BYTES);
+    let truncated_by_bytes = truncated_slice.len() < content.len();
+
+    let mut preview = String::new();
+    let mut lines_iter = truncated_slice.lines();
+    for idx in 0..TELEMETRY_PREVIEW_MAX_LINES {
+        match lines_iter.next() {
+            Some(line) => {
+                if idx > 0 {
+                    preview.push('\n');
+                }
+                preview.push_str(line);
+            }
+            None => break,
+        }
+    }
+    let truncated_by_lines = lines_iter.next().is_some();
+
+    if !truncated_by_bytes && !truncated_by_lines {
+        return content.to_string();
+    }
+
+    if preview.len() < truncated_slice.len()
+        && truncated_slice
+            .as_bytes()
+            .get(preview.len())
+            .is_some_and(|byte| *byte == b'\n')
+    {
+        preview.push('\n');
+    }
+
+    if !preview.is_empty() && !preview.ends_with('\n') {
+        preview.push('\n');
+    }
+    preview.push_str(TELEMETRY_PREVIEW_TRUNCATION_NOTICE);
+
+    preview
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -148,6 +192,38 @@ mod tests {
            other => panic!("expected FunctionCallOutput, got {other:?}"),
        }
    }
+
+    #[test]
+    fn telemetry_preview_returns_original_within_limits() {
+        let content = "short output";
+        assert_eq!(telemetry_preview(content), content);
+    }
+
+    #[test]
+    fn telemetry_preview_truncates_by_bytes() {
+        let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
+        let preview = telemetry_preview(&content);
+
+        assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
+        assert!(
+            preview.len()
+                <= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
+        );
+    }
+
+    #[test]
+    fn telemetry_preview_truncates_by_lines() {
+        let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
+            .map(|idx| format!("line {idx}"))
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let preview = telemetry_preview(&content);
+        let lines: Vec<&str> = preview.lines().collect();
+
+        assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
+        assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
+    }
 }

 #[derive(Clone, Debug)]
--- a/codex-rs/core/src/tools/handlers/read_file.rs
+++ b/codex-rs/core/src/tools/handlers/read_file.rs
@@ -17,7 +17,7 @@ use crate::tools::registry::ToolKind;

 pub struct ReadFileHandler;

-const MAX_LINE_LENGTH: usize = 200;
+const MAX_LINE_LENGTH: usize = 500;

 fn default_offset() -> usize {
    1
@@ -105,15 +105,28 @@ async fn read_file_slice(
        .await
        .map_err(|err| FunctionCallError::RespondToModel(format!("failed to read file: {err}")))?;

-    let mut reader = BufReader::new(file).lines();
+    let mut reader = BufReader::new(file);
    let mut collected = Vec::new();
    let mut seen = 0usize;
+    let mut buffer = Vec::new();
+
+    loop {
+        buffer.clear();
+        let bytes_read = reader.read_until(b'\n', &mut buffer).await.map_err(|err| {
+            FunctionCallError::RespondToModel(format!("failed to read file: {err}"))
+        })?;
+
+        if bytes_read == 0 {
+            break;
+        }
+
+        if buffer.last() == Some(&b'\n') {
+            buffer.pop();
+            if buffer.last() == Some(&b'\r') {
+                buffer.pop();
+            }
+        }

-    while let Some(line) = reader
-        .next_line()
-        .await
-        .map_err(|err| FunctionCallError::RespondToModel(format!("failed to read file: {err}")))?
-    {
        seen += 1;

        if seen < offset {
@@ -124,12 +137,7 @@ async fn read_file_slice(
            break;
        }

-        let formatted = if line.len() > MAX_LINE_LENGTH {
-            take_bytes_at_char_boundary(&line, MAX_LINE_LENGTH).to_string()
-        } else {
-            line
-        };
-
+        let formatted = format_line(&buffer);
        collected.push(format!("L{seen}: {formatted}"));

        if collected.len() == limit {
@@ -146,6 +154,15 @@ async fn read_file_slice(
    Ok(collected)
 }

+fn format_line(bytes: &[u8]) -> String {
+    let decoded = String::from_utf8_lossy(bytes);
+    if decoded.len() > MAX_LINE_LENGTH {
+        take_bytes_at_char_boundary(&decoded, MAX_LINE_LENGTH).to_string()
+    } else {
+        decoded.into_owned()
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -179,4 +196,17 @@ mod tests {
            FunctionCallError::RespondToModel("offset exceeds file length".to_string())
        );
    }
+
+    #[tokio::test]
+    async fn reads_non_utf8_lines() {
+        let mut temp = NamedTempFile::new().expect("create temp file");
+        use std::io::Write as _;
+        temp.as_file_mut().write_all(b"\xff\xfe\nplain\n").unwrap();
+
+        let lines = read_file_slice(temp.path(), 1, 2)
+            .await
+            .expect("read slice");
+        let expected_first = format!("L1: {}{}", '\u{FFFD}', '\u{FFFD}');
+        assert_eq!(lines, vec![expected_first, "L2: plain".to_string()]);
+    }
 }
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -39,6 +39,12 @@ pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
 pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
 pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;

+// Telemetry preview limits: keep log events smaller than model budgets.
+pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
+pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
+pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
+    "[... telemetry preview truncated ...]";
+
 pub(crate) async fn handle_container_exec_with_params(
    tool_name: &str,
    params: ExecParams,
@@ -74,7 +80,7 @@ pub(crate) async fn handle_container_exec_with_params(
            // could not resolve it into a patch that would apply
            // cleanly. Return to model for resample.
            return Err(FunctionCallError::RespondToModel(format!(
-                "error: {parse_error:#?}"
+                "apply_patch verification failed: {parse_error}"
            )));
        }
        MaybeApplyPatchVerified::ShellParseError(error) => {
@@ -219,22 +225,26 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        return s.to_string();
    }

-    let lines: Vec<&str> = s.lines().collect();
-    let head_take = MODEL_FORMAT_HEAD_LINES.min(lines.len());
-    let tail_take = MODEL_FORMAT_TAIL_LINES.min(lines.len().saturating_sub(head_take));
-    let omitted = lines.len().saturating_sub(head_take + tail_take);
+    let segments: Vec<&str> = s.split_inclusive('\n').collect();
+    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
+    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
+    let omitted = segments.len().saturating_sub(head_take + tail_take);

-    // Join head and tail blocks (lines() strips newlines; reinsert them)
-    let head_block = lines
+    let head_slice_end: usize = segments
        .iter()
        .take(head_take)
-        .cloned()
-        .collect::<Vec<_>>()
-        .join("\n");
-    let tail_block = if tail_take > 0 {
-        lines[lines.len() - tail_take..].join("\n")
+        .map(|segment| segment.len())
+        .sum();
+    let tail_slice_start: usize = if tail_take == 0 {
+        s.len()
    } else {
-        String::new()
+        s.len()
+            - segments
+                .iter()
+                .rev()
+                .take(tail_take)
+                .map(|segment| segment.len())
+                .sum::<usize>()
    };
    let marker = format!("\n[... omitted {omitted} of {total_lines} lines ...]\n\n");

@@ -250,19 +260,20 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
    }

-    // Enforce line-count cap by trimming head/tail lines
-    let head_lines_text = head_block;
-    let tail_lines_text = tail_block;
-    // Build final string respecting byte budgets
-    let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
+    let head_slice = &s[..head_slice_end];
+    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));

    result.push_str(head_part);
    result.push_str(&marker);

    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
-    let tail_budget_final = remaining;
-    let tail_part = take_last_bytes_at_char_boundary(&tail_lines_text, tail_budget_final);
+    if remaining == 0 {
+        return result;
+    }
+
+    let tail_slice = &s[tail_slice_start..];
+    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
    result.push_str(tail_part);

    result
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@@ -47,7 +47,6 @@ pub trait ToolHandler: Send + Sync {
        matches!(
            (self.kind(), payload),
            (ToolKind::Function, ToolPayload::Function { .. })
-                | (ToolKind::Function, ToolPayload::UnifiedExec { .. })
                | (ToolKind::UnifiedExec, ToolPayload::UnifiedExec { .. })
                | (ToolKind::Mcp, ToolPayload::Mcp { .. })
        )
@@ -89,7 +88,7 @@ impl ToolRegistry {
        let call_id_owned = invocation.call_id.clone();
        let otel = invocation.turn.client.get_otel_event_manager();
        let payload_for_response = invocation.payload.clone();
-        let log_payload = payload_for_response.log_payload().into_owned();
+        let log_payload = payload_for_response.log_payload();

        let entry = match self.handlers.get(tool_name.as_str()) {
            Some(entry) => entry,
@@ -99,7 +98,7 @@ impl ToolRegistry {
                otel.tool_result(
                    tool_name.as_ref(),
                    &call_id_owned,
-                    &log_payload,
+                    log_payload.as_ref(),
                    Duration::ZERO,
                    false,
                    &message,
@@ -115,7 +114,7 @@ impl ToolRegistry {
            otel.tool_result(
                tool_name.as_ref(),
                &call_id_owned,
-                &log_payload,
+                log_payload.as_ref(),
                Duration::ZERO,
                false,
                &message,
@@ -123,35 +122,36 @@ impl ToolRegistry {
            return Err(FunctionCallError::RespondToModel(message));
        }

-        let output_cell = std::sync::Mutex::new(None);
+        let output_cell = tokio::sync::Mutex::new(None);

        let result = otel
-            .log_tool_result(tool_name.as_ref(), &call_id_owned, &log_payload, || {
-                let handler = handler.clone();
-                let output_cell = &output_cell;
-                let invocation = invocation;
-                async move {
-                    match handler.handle(invocation).await {
-                        Ok(output) => {
-                            let preview = output.log_preview();
-                            let success = output.success_for_logging();
-                            let mut guard = output_cell
-                                .lock()
-                                .unwrap_or_else(std::sync::PoisonError::into_inner);
-                            *guard = Some(output);
-                            Ok((preview, success))
+            .log_tool_result(
+                tool_name.as_ref(),
+                &call_id_owned,
+                log_payload.as_ref(),
+                || {
+                    let handler = handler.clone();
+                    let output_cell = &output_cell;
+                    let invocation = invocation;
+                    async move {
+                        match handler.handle(invocation).await {
+                            Ok(output) => {
+                                let preview = output.log_preview();
+                                let success = output.success_for_logging();
+                                let mut guard = output_cell.lock().await;
+                                *guard = Some(output);
+                                Ok((preview, success))
+                            }
+                            Err(err) => Err(err),
                        }
-                        Err(err) => Err(err),
                    }
-                }
-            })
+                },
+            )
            .await;

        match result {
            Ok(_) => {
-                let mut guard = output_cell
-                    .lock()
-                    .unwrap_or_else(std::sync::PoisonError::into_inner);
+                let mut guard = output_cell.lock().await;
                let output = guard.take().ok_or_else(|| {
                    FunctionCallError::RespondToModel("tool produced no output".to_string())
                })?;
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -145,7 +145,7 @@ impl Router {
        sub_id: &str,
        call: ToolCall,
    ) -> ResponseInputItem {
-        let payload_clone = call.payload.clone();
+        let payload_outputs_custom = matches!(call.payload, ToolPayload::Custom { .. });
        let ToolCall {
            tool_name,
            call_id,
@@ -165,28 +165,29 @@ impl Router {

        match self.registry.dispatch(invocation).await {
            Ok(response) => response,
-            Err(err) => Self::failure_response(call_id, payload_clone, err),
+            Err(err) => Self::failure_response(call_id, payload_outputs_custom, err),
        }
    }

    fn failure_response(
        call_id: String,
-        payload: ToolPayload,
+        payload_outputs_custom: bool,
        err: FunctionCallError,
    ) -> ResponseInputItem {
        let message = err.to_string();
-        match payload {
-            ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
+        if payload_outputs_custom {
+            ResponseInputItem::CustomToolCallOutput {
                call_id,
                output: message,
-            },
-            _ => ResponseInputItem::FunctionCallOutput {
+            }
+        } else {
+            ResponseInputItem::FunctionCallOutput {
                call_id,
                output: codex_protocol::models::FunctionCallOutputPayload {
                    content: message,
                    success: Some(false),
                },
-            },
+            }
        }
    }
 }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -7,6 +7,9 @@ use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;

+#[cfg(target_os = "linux")]
+use assert_cmd::cargo::cargo_bin;
+
 pub mod responses;
 pub mod test_codex;
 pub mod test_codex_exec;
@@ -17,12 +20,25 @@ pub mod test_codex_exec;
 pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
    Config::load_from_base_config_with_overrides(
        ConfigToml::default(),
-        ConfigOverrides::default(),
+        default_test_overrides(),
        codex_home.path().to_path_buf(),
    )
    .expect("defaults for test should always succeed")
 }

+#[cfg(target_os = "linux")]
+fn default_test_overrides() -> ConfigOverrides {
+    ConfigOverrides {
+        codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")),
+        ..ConfigOverrides::default()
+    }
+}
+
+#[cfg(not(target_os = "linux"))]
+fn default_test_overrides() -> ConfigOverrides {
+    ConfigOverrides::default()
+}
+
 /// Builds an SSE stream body from a JSON fixture.
 ///
 /// The fixture must contain an array of objects where each object represents a
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -23,5 +23,6 @@ mod seatbelt;
 mod stream_error_allows_next_turn;
 mod stream_no_completed;
 mod tool_harness;
+mod unified_exec;
 mod user_notification;
 mod view_image;
--- a/codex-rs/core/tests/suite/tool_harness.rs
+++ b/codex-rs/core/tests/suite/tool_harness.rs
@@ -464,3 +464,105 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let call_id = "apply-patch-parse-error";
+    let patch_content = r"*** Begin Patch
+*** Update File: broken.txt
+*** End Patch";
+
+    let first_response = sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp-1"}
+        }),
+        ev_apply_patch_function_call(call_id, patch_content),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "failed"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "please apply a patch".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    loop {
+        let event = codex.next_event().await.expect("event");
+        if matches!(event.msg, EventMsg::TaskComplete(_)) {
+            break;
+        }
+    }
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    assert!(!requests.is_empty(), "expected at least one POST request");
+
+    let request_bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect::<Vec<_>>();
+
+    let body_with_tool_output = find_request_with_function_call_output(&request_bodies)
+        .expect("function_call_output item not found in requests");
+    let output_item = function_call_output(body_with_tool_output).expect("tool output item");
+    assert_eq!(
+        output_item.get("call_id").and_then(Value::as_str),
+        Some(call_id)
+    );
+    let output_text = extract_output_text(output_item).expect("output text present");
+
+    assert!(
+        output_text.contains("apply_patch verification failed"),
+        "expected apply_patch verification failure message, got {output_text:?}"
+    );
+    assert!(
+        output_text.contains("invalid hunk"),
+        "expected parse diagnostics in output text, got {output_text:?}"
+    );
+
+    if let Some(success_flag) = output_item
+        .get("output")
+        .and_then(|value| value.as_object())
+        .and_then(|obj| obj.get("success"))
+        .and_then(serde_json::Value::as_bool)
+    {
+        assert!(
+            !success_flag,
+            "expected tool output to mark success=false for parse failures"
+        );
+    }
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -0,0 +1,280 @@
+#![cfg(not(target_os = "windows"))]
+
+use std::collections::HashMap;
+
+use anyhow::Result;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::skip_if_sandbox;
+use core_test_support::test_codex::TestCodex;
+use core_test_support::test_codex::test_codex;
+use serde_json::Value;
+
+fn extract_output_text(item: &Value) -> Option<&str> {
+    item.get("output").and_then(|value| match value {
+        Value::String(text) => Some(text.as_str()),
+        Value::Object(obj) => obj.get("content").and_then(Value::as_str),
+        _ => None,
+    })
+}
+
+fn collect_tool_outputs(bodies: &[Value]) -> Result<HashMap<String, Value>> {
+    let mut outputs = HashMap::new();
+    for body in bodies {
+        if let Some(items) = body.get("input").and_then(Value::as_array) {
+            for item in items {
+                if item.get("type").and_then(Value::as_str) != Some("function_call_output") {
+                    continue;
+                }
+                if let Some(call_id) = item.get("call_id").and_then(Value::as_str) {
+                    let content = extract_output_text(item)
+                        .ok_or_else(|| anyhow::anyhow!("missing tool output content"))?;
+                    let parsed: Value = serde_json::from_str(content)?;
+                    outputs.insert(call_id.to_string(), parsed);
+                }
+            }
+        }
+    }
+    Ok(outputs)
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let first_call_id = "uexec-start";
+    let first_args = serde_json::json!({
+        "input": ["/bin/cat"],
+        "timeout_ms": 200,
+    });
+
+    let second_call_id = "uexec-stdin";
+    let second_args = serde_json::json!({
+        "input": ["hello unified exec\n"],
+        "session_id": "0",
+        "timeout_ms": 500,
+    });
+
+    let responses = vec![
+        sse(vec![
+            serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_function_call(
+                first_call_id,
+                "unified_exec",
+                &serde_json::to_string(&first_args)?,
+            ),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
+            ev_function_call(
+                second_call_id,
+                "unified_exec",
+                &serde_json::to_string(&second_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "all done"),
+            ev_completed("resp-3"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "run unified exec".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    loop {
+        let event = codex.next_event().await.expect("event");
+        if matches!(event.msg, EventMsg::TaskComplete(_)) {
+            break;
+        }
+    }
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    assert!(!requests.is_empty(), "expected at least one POST request");
+
+    let bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect::<Vec<_>>();
+
+    let outputs = collect_tool_outputs(&bodies)?;
+
+    let start_output = outputs
+        .get(first_call_id)
+        .expect("missing first unified_exec output");
+    let session_id = start_output["session_id"].as_str().unwrap_or_default();
+    assert!(
+        !session_id.is_empty(),
+        "expected session id in first unified_exec response"
+    );
+    assert!(
+        start_output["output"]
+            .as_str()
+            .unwrap_or_default()
+            .is_empty()
+    );
+
+    let reuse_output = outputs
+        .get(second_call_id)
+        .expect("missing reused unified_exec output");
+    assert_eq!(
+        reuse_output["session_id"].as_str().unwrap_or_default(),
+        session_id
+    );
+    let echoed = reuse_output["output"].as_str().unwrap_or_default();
+    assert!(
+        echoed.contains("hello unified exec"),
+        "expected echoed output, got {echoed:?}"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let first_call_id = "uexec-timeout";
+    let first_args = serde_json::json!({
+        "input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
+        "timeout_ms": 10,
+    });
+
+    let second_call_id = "uexec-poll";
+    let second_args = serde_json::json!({
+        "input": Vec::<String>::new(),
+        "session_id": "0",
+        "timeout_ms": 800,
+    });
+
+    let responses = vec![
+        sse(vec![
+            serde_json::json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_function_call(
+                first_call_id,
+                "unified_exec",
+                &serde_json::to_string(&first_args)?,
+            ),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            serde_json::json!({"type": "response.created", "response": {"id": "resp-2"}}),
+            ev_function_call(
+                second_call_id,
+                "unified_exec",
+                &serde_json::to_string(&second_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-3"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "check timeout".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    loop {
+        let event = codex.next_event().await.expect("event");
+        if matches!(event.msg, EventMsg::TaskComplete(_)) {
+            break;
+        }
+    }
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    assert!(!requests.is_empty(), "expected at least one POST request");
+
+    let bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect::<Vec<_>>();
+
+    let outputs = collect_tool_outputs(&bodies)?;
+
+    let first_output = outputs.get(first_call_id).expect("missing timeout output");
+    assert_eq!(first_output["session_id"], "0");
+    assert!(
+        first_output["output"]
+            .as_str()
+            .unwrap_or_default()
+            .is_empty()
+    );
+
+    let poll_output = outputs.get(second_call_id).expect("missing poll output");
+    let output_text = poll_output["output"].as_str().unwrap_or_default();
+    assert!(
+        output_text.contains("ready"),
+        "expected ready output, got {output_text:?}"
+    );
+
+    Ok(())
+}
--- a/codex-rs/otel/src/otel_event_manager.rs
+++ b/codex-rs/otel/src/otel_event_manager.rs
@@ -14,6 +14,7 @@ use eventsource_stream::EventStreamError as StreamError;
 use reqwest::Error;
 use reqwest::Response;
 use serde::Serialize;
+use std::borrow::Cow;
 use std::fmt::Display;
 use std::time::Duration;
 use std::time::Instant;
@@ -377,8 +378,8 @@ impl OtelEventManager {
        let duration = start.elapsed();

        let (output, success) = match &result {
-            Ok((preview, success)) => (preview.clone(), *success),
-            Err(error) => (error.to_string(), false),
+            Ok((preview, success)) => (Cow::Borrowed(preview.as_str()), *success),
+            Err(error) => (Cow::Owned(error.to_string()), false),
        };

        let success_str = if success { "true" } else { "false" };
@@ -399,6 +400,7 @@ impl OtelEventManager {
            arguments = %arguments,
            duration_ms = %duration.as_millis(),
            success = %success_str,
+            // `output` is truncated by the tool layer before reaching telemetry.
            output = %output,
        );