Fix: cap aggregated exec output consistently (#9759)

## WHAT?
- Bias aggregated output toward stderr under contention (2/3 stderr, 1/3
stdout) while keeping the 1 MiB cap.
- Rebalance unused stderr share back to stdout when stderr is tiny to
avoid underfilling.
- Add tests for contention, small-stderr rebalance, and under-cap
ordering (stdout then stderr).

## WHY?
- Review feedback requested stderr priority under contention.
- Avoid underfilled aggregated output when stderr is small while
preserving a consistent cap across exec paths.

## HOW?
- Update `aggregate_output` to compute stdout/stderr shares, then
reassign unused capacity to the other stream.
- Use the helper in both Windows and async exec paths.
- Add regression tests for contention/rebalance and under-cap ordering.

## BEFORE
```rust
// Best-effort aggregate: stdout then stderr (capped).
let mut aggregated = Vec::with_capacity(
    stdout
        .text
        .len()
        .saturating_add(stderr.text.len())
        .min(EXEC_OUTPUT_MAX_BYTES),
);
append_capped(&mut aggregated, &stdout.text, EXEC_OUTPUT_MAX_BYTES);
append_capped(&mut aggregated, &stderr.text, EXEC_OUTPUT_MAX_BYTES);
let aggregated_output = StreamOutput {
    text: aggregated,
    truncated_after_lines: None,
};
```

## AFTER
```rust
fn aggregate_output(
    stdout: &StreamOutput<Vec<u8>>,
    stderr: &StreamOutput<Vec<u8>>,
) -> StreamOutput<Vec<u8>> {
    let total_len = stdout.text.len().saturating_add(stderr.text.len());
    let max_bytes = EXEC_OUTPUT_MAX_BYTES;
    let mut aggregated = Vec::with_capacity(total_len.min(max_bytes));

    if total_len <= max_bytes {
        aggregated.extend_from_slice(&stdout.text);
        aggregated.extend_from_slice(&stderr.text);
        return StreamOutput {
            text: aggregated,
            truncated_after_lines: None,
        };
    }

    // Under contention, reserve 1/3 for stdout and 2/3 for stderr; rebalance unused stderr to stdout.
    let want_stdout = stdout.text.len().min(max_bytes / 3);
    let want_stderr = stderr.text.len();
    let stderr_take = want_stderr.min(max_bytes.saturating_sub(want_stdout));
    let remaining = max_bytes.saturating_sub(want_stdout + stderr_take);
    let stdout_take = want_stdout + remaining.min(stdout.text.len().saturating_sub(want_stdout));

    aggregated.extend_from_slice(&stdout.text[..stdout_take]);
    aggregated.extend_from_slice(&stderr.text[..stderr_take]);

    StreamOutput {
        text: aggregated,
        truncated_after_lines: None,
    }
}
```

## TESTS
- [x] `just fmt`
- [x] `just fix -p codex-core`
- [x] `cargo test -p codex-core aggregate_output_`
- [x] `cargo test -p codex-core`
- [x] `cargo test --all-features`

## FIXES
Fixes #9758
This commit is contained in:
K Bediako
2026-01-27 20:29:12 +11:00
committed by GitHub
parent 509ff1c643
commit ab99df0694

View File

@@ -312,20 +312,7 @@ async fn exec_windows_sandbox(
text: stderr_text,
truncated_after_lines: None,
};
// Best-effort aggregate: stdout then stderr (capped).
let mut aggregated = Vec::with_capacity(
stdout
.text
.len()
.saturating_add(stderr.text.len())
.min(EXEC_OUTPUT_MAX_BYTES),
);
append_capped(&mut aggregated, &stdout.text, EXEC_OUTPUT_MAX_BYTES);
append_capped(&mut aggregated, &stderr.text, EXEC_OUTPUT_MAX_BYTES);
let aggregated_output = StreamOutput {
text: aggregated,
truncated_after_lines: None,
};
let aggregated_output = aggregate_output(&stdout, &stderr);
Ok(RawExecToolCallOutput {
exit_status,
@@ -519,6 +506,39 @@ fn append_capped(dst: &mut Vec<u8>, src: &[u8], max_bytes: usize) {
dst.extend_from_slice(&src[..take]);
}
fn aggregate_output(
stdout: &StreamOutput<Vec<u8>>,
stderr: &StreamOutput<Vec<u8>>,
) -> StreamOutput<Vec<u8>> {
let total_len = stdout.text.len().saturating_add(stderr.text.len());
let max_bytes = EXEC_OUTPUT_MAX_BYTES;
let mut aggregated = Vec::with_capacity(total_len.min(max_bytes));
if total_len <= max_bytes {
aggregated.extend_from_slice(&stdout.text);
aggregated.extend_from_slice(&stderr.text);
return StreamOutput {
text: aggregated,
truncated_after_lines: None,
};
}
// Under contention, reserve 1/3 for stdout and 2/3 for stderr; rebalance unused stderr to stdout.
let want_stdout = stdout.text.len().min(max_bytes / 3);
let want_stderr = stderr.text.len();
let stderr_take = want_stderr.min(max_bytes.saturating_sub(want_stdout));
let remaining = max_bytes.saturating_sub(want_stdout + stderr_take);
let stdout_take = want_stdout + remaining.min(stdout.text.len().saturating_sub(want_stdout));
aggregated.extend_from_slice(&stdout.text[..stdout_take]);
aggregated.extend_from_slice(&stderr.text[..stderr_take]);
StreamOutput {
text: aggregated,
truncated_after_lines: None,
}
}
#[derive(Clone, Debug)]
pub struct ExecToolCallOutput {
pub exit_code: i32,
@@ -683,20 +703,7 @@ async fn consume_truncated_output(
Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
)
.await?;
// Best-effort aggregate: stdout then stderr (capped).
let mut aggregated = Vec::with_capacity(
stdout
.text
.len()
.saturating_add(stderr.text.len())
.min(EXEC_OUTPUT_MAX_BYTES),
);
append_capped(&mut aggregated, &stdout.text, EXEC_OUTPUT_MAX_BYTES);
append_capped(&mut aggregated, &stderr.text, EXEC_OUTPUT_MAX_BYTES * 2);
let aggregated_output = StreamOutput {
text: aggregated,
truncated_after_lines: None,
};
let aggregated_output = aggregate_output(&stdout, &stderr);
Ok(RawExecToolCallOutput {
exit_status,
@@ -771,6 +778,7 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use std::time::Duration;
use tokio::io::AsyncWriteExt;
@@ -846,6 +854,85 @@ mod tests {
assert_eq!(out.text.len(), EXEC_OUTPUT_MAX_BYTES);
}
#[test]
fn aggregate_output_prefers_stderr_on_contention() {
let stdout = StreamOutput {
text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES],
truncated_after_lines: None,
};
let stderr = StreamOutput {
text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES],
truncated_after_lines: None,
};
let aggregated = aggregate_output(&stdout, &stderr);
let stdout_cap = EXEC_OUTPUT_MAX_BYTES / 3;
let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_cap);
assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
assert_eq!(aggregated.text[..stdout_cap], vec![b'a'; stdout_cap]);
assert_eq!(aggregated.text[stdout_cap..], vec![b'b'; stderr_cap]);
}
#[test]
fn aggregate_output_fills_remaining_capacity_with_stderr() {
let stdout_len = EXEC_OUTPUT_MAX_BYTES / 10;
let stdout = StreamOutput {
text: vec![b'a'; stdout_len],
truncated_after_lines: None,
};
let stderr = StreamOutput {
text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES],
truncated_after_lines: None,
};
let aggregated = aggregate_output(&stdout, &stderr);
let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_len);
assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]);
assert_eq!(aggregated.text[stdout_len..], vec![b'b'; stderr_cap]);
}
#[test]
fn aggregate_output_rebalances_when_stderr_is_small() {
let stdout = StreamOutput {
text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES],
truncated_after_lines: None,
};
let stderr = StreamOutput {
text: vec![b'b'; 1],
truncated_after_lines: None,
};
let aggregated = aggregate_output(&stdout, &stderr);
let stdout_len = EXEC_OUTPUT_MAX_BYTES.saturating_sub(1);
assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]);
assert_eq!(aggregated.text[stdout_len..], vec![b'b'; 1]);
}
#[test]
fn aggregate_output_keeps_stdout_then_stderr_when_under_cap() {
let stdout = StreamOutput {
text: vec![b'a'; 4],
truncated_after_lines: None,
};
let stderr = StreamOutput {
text: vec![b'b'; 3],
truncated_after_lines: None,
};
let aggregated = aggregate_output(&stdout, &stderr);
let mut expected = Vec::new();
expected.extend_from_slice(&stdout.text);
expected.extend_from_slice(&stderr.text);
assert_eq!(aggregated.text, expected);
assert_eq!(aggregated.truncated_after_lines, None);
}
#[cfg(unix)]
#[test]
fn sandbox_detection_flags_sigsys_exit_code() {