mirror of
https://github.com/openai/codex.git
synced 2026-05-21 19:45:26 +00:00
Compare commits
5 Commits
rust-v0.13
...
starr/rust
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c101891009 | ||
|
|
19e75b3299 | ||
|
|
842483a85e | ||
|
|
5a4202ad90 | ||
|
|
e43a2e297f |
@@ -47,6 +47,18 @@ on:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
nextest_filterset:
|
||||
required: false
|
||||
default: "all()"
|
||||
type: string
|
||||
selected_shard:
|
||||
required: false
|
||||
default: 0
|
||||
type: number
|
||||
repeat_count:
|
||||
required: false
|
||||
default: 1
|
||||
type: number
|
||||
|
||||
# Caller workflow-level env does not flow through workflow_call, so keep the
|
||||
# Cargo git transport hardening on the archive and shard jobs directly here.
|
||||
@@ -76,6 +88,19 @@ jobs:
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Validate targeted test inputs
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if (( ${{ inputs.selected_shard }} < 0 || ${{ inputs.selected_shard }} > 4 )); then
|
||||
echo "selected_shard must be between 0 and 4" >&2
|
||||
exit 1
|
||||
fi
|
||||
if (( ${{ inputs.repeat_count }} < 1 )); then
|
||||
echo "repeat_count must be at least 1" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Configure Dev Drive (Windows)
|
||||
if: ${{ runner.os == 'Windows' }}
|
||||
shell: pwsh
|
||||
@@ -287,7 +312,7 @@ jobs:
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
shard:
|
||||
name: Tests shard ${{ matrix.shard }}/4
|
||||
name: Tests ${{ matrix.shard == 0 && 'all selected tests' || format('shard {0}/4', matrix.shard) }}
|
||||
needs: archive
|
||||
runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
|
||||
timeout-minutes: 60
|
||||
@@ -300,7 +325,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
shard: [1, 2, 3, 4]
|
||||
shard: ${{ inputs.selected_shard == 0 && fromJSON('[0]') || fromJSON(format('[{0}]', inputs.selected_shard)) }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
@@ -391,8 +416,11 @@ jobs:
|
||||
--no-fail-fast
|
||||
--archive-file "${archive_file}"
|
||||
--workspace-remap "${workspace_root}"
|
||||
--partition "hash:${{ matrix.shard }}/4"
|
||||
--filterset "${NEXTEST_FILTERSET}"
|
||||
)
|
||||
if [[ "${{ matrix.shard }}" != "0" ]]; then
|
||||
nextest_args+=(--partition "hash:${{ matrix.shard }}/4")
|
||||
fi
|
||||
if [[ "${{ inputs.test_threads }}" != "0" ]]; then
|
||||
nextest_args+=(--test-threads "${{ inputs.test_threads }}")
|
||||
fi
|
||||
@@ -417,8 +445,12 @@ jobs:
|
||||
)
|
||||
fi
|
||||
|
||||
"${test_command[@]}"
|
||||
for attempt in $(seq 1 "${{ inputs.repeat_count }}"); do
|
||||
echo "nextest attempt ${attempt}/${{ inputs.repeat_count }}"
|
||||
"${test_command[@]}"
|
||||
done
|
||||
env:
|
||||
NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
|
||||
RUST_BACKTRACE: 1
|
||||
RUST_MIN_STACK: "8388608" # 8 MiB
|
||||
NEXTEST_STATUS_LEVEL: leak
|
||||
|
||||
86
.github/workflows/rust-ci-full.yml
vendored
86
.github/workflows/rust-ci-full.yml
vendored
@@ -5,6 +5,34 @@ on:
|
||||
- main
|
||||
- "**full-ci**"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
repro_platform:
|
||||
description: Platform lane to run. Use all for the normal full workflow.
|
||||
required: true
|
||||
default: all
|
||||
type: choice
|
||||
options:
|
||||
- all
|
||||
- macos-aarch64
|
||||
- linux-x64-remote
|
||||
- linux-arm64
|
||||
- windows-x64
|
||||
- windows-arm64
|
||||
nextest_filterset:
|
||||
description: cargo-nextest filterset selecting the tests to run.
|
||||
required: true
|
||||
default: all()
|
||||
type: string
|
||||
shard:
|
||||
description: Full-CI shard to reproduce. Use 0 to run selected tests without sharding.
|
||||
required: true
|
||||
default: 0
|
||||
type: number
|
||||
repeat_count:
|
||||
description: Number of times to rerun the selected shard/filterset in one job.
|
||||
required: true
|
||||
default: 1
|
||||
type: number
|
||||
|
||||
# CI builds in debug (dev) for faster signal.
|
||||
env:
|
||||
@@ -16,6 +44,7 @@ env:
|
||||
jobs:
|
||||
# --- CI that doesn't need specific targets ---------------------------------
|
||||
general:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
|
||||
name: Format / etc
|
||||
runs-on: ubuntu-24.04
|
||||
defaults:
|
||||
@@ -32,6 +61,7 @@ jobs:
|
||||
run: cargo fmt -- --config imports_granularity=Item --check
|
||||
|
||||
cargo_shear:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
|
||||
name: cargo shear
|
||||
runs-on: ubuntu-24.04
|
||||
defaults:
|
||||
@@ -49,6 +79,7 @@ jobs:
|
||||
run: cargo shear --deny-warnings
|
||||
|
||||
argument_comment_lint_package:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
|
||||
name: Argument comment lint package
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
@@ -90,6 +121,7 @@ jobs:
|
||||
RUST_MIN_STACK: "8388608" # 8 MiB
|
||||
|
||||
argument_comment_lint_prebuilt:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
|
||||
name: Argument comment lint - ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.runs_on || matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
@@ -149,6 +181,7 @@ jobs:
|
||||
|
||||
# --- CI to validate on different os/targets --------------------------------
|
||||
lint_build:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
|
||||
name: Lint/Build — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
|
||||
runs-on: ${{ matrix.runs_on || matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
@@ -522,6 +555,7 @@ jobs:
|
||||
key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1
|
||||
|
||||
tests_macos_aarch64:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'macos-aarch64' }}
|
||||
name: Tests — macos-15-xlarge - aarch64-apple-darwin
|
||||
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
|
||||
with:
|
||||
@@ -530,9 +564,13 @@ jobs:
|
||||
profile: ci-test
|
||||
artifact_id: macos-aarch64
|
||||
use_sccache: true
|
||||
nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
|
||||
selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
|
||||
repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
|
||||
secrets: inherit
|
||||
|
||||
tests_linux_x64_remote:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-x64-remote' }}
|
||||
name: Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu (remote)
|
||||
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
|
||||
with:
|
||||
@@ -544,9 +582,13 @@ jobs:
|
||||
artifact_id: linux-x64-remote
|
||||
remote_env: true
|
||||
use_sccache: true
|
||||
nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
|
||||
selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
|
||||
repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
|
||||
secrets: inherit
|
||||
|
||||
tests_linux_arm64:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-arm64' }}
|
||||
name: Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu
|
||||
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
|
||||
with:
|
||||
@@ -557,9 +599,13 @@ jobs:
|
||||
profile: ci-test
|
||||
artifact_id: linux-arm64
|
||||
use_sccache: true
|
||||
nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
|
||||
selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
|
||||
repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
|
||||
secrets: inherit
|
||||
|
||||
tests_windows_x64:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-x64' }}
|
||||
name: Tests — windows-x64 - x86_64-pc-windows-msvc
|
||||
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
|
||||
with:
|
||||
@@ -570,9 +616,13 @@ jobs:
|
||||
profile: ci-test
|
||||
artifact_id: windows-x64
|
||||
test_threads: 8
|
||||
nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
|
||||
selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
|
||||
repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
|
||||
secrets: inherit
|
||||
|
||||
tests_windows_arm64:
|
||||
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-arm64' }}
|
||||
name: Tests — windows-arm64 - aarch64-pc-windows-msvc
|
||||
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
|
||||
with:
|
||||
@@ -587,6 +637,9 @@ jobs:
|
||||
artifact_id: windows-arm64
|
||||
test_threads: 8
|
||||
use_sccache: true
|
||||
nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
|
||||
selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
|
||||
repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
|
||||
secrets: inherit
|
||||
|
||||
# --- Gatherer job for the full post-merge workflow --------------------------
|
||||
@@ -621,16 +674,29 @@ jobs:
|
||||
echo "test arm64 : ${{ needs.tests_linux_arm64.result }}"
|
||||
echo "test winx64: ${{ needs.tests_windows_x64.result }}"
|
||||
echo "test winarm: ${{ needs.tests_windows_arm64.result }}"
|
||||
[[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
|
||||
[[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
|
||||
[[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
|
||||
[[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
|
||||
[[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
|
||||
[[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
|
||||
[[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
|
||||
if [[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.event.inputs.repro_platform }}' != 'all' ]]; then
|
||||
selected_result=''
|
||||
case '${{ github.event.inputs.repro_platform }}' in
|
||||
macos-aarch64) selected_result='${{ needs.tests_macos_aarch64.result }}' ;;
|
||||
linux-x64-remote) selected_result='${{ needs.tests_linux_x64_remote.result }}' ;;
|
||||
linux-arm64) selected_result='${{ needs.tests_linux_arm64.result }}' ;;
|
||||
windows-x64) selected_result='${{ needs.tests_windows_x64.result }}' ;;
|
||||
windows-arm64) selected_result='${{ needs.tests_windows_arm64.result }}' ;;
|
||||
*) echo 'unknown repro_platform'; exit 1 ;;
|
||||
esac
|
||||
[[ "${selected_result}" == 'success' ]] || { echo "selected repro platform failed: ${selected_result}"; exit 1; }
|
||||
else
|
||||
[[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
|
||||
[[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
|
||||
[[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
|
||||
[[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
|
||||
[[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
|
||||
[[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
|
||||
[[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
|
||||
[[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
|
||||
fi
|
||||
|
||||
- name: sccache summary note
|
||||
if: always()
|
||||
|
||||
@@ -311,6 +311,7 @@ pub struct ExecCommandToolOutput {
|
||||
pub wall_time: Duration,
|
||||
/// Raw bytes returned for this unified exec call before any truncation.
|
||||
pub raw_output: Vec<u8>,
|
||||
pub truncation_policy: TruncationPolicy,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub process_id: Option<i32>,
|
||||
pub exit_code: Option<i32>,
|
||||
@@ -357,7 +358,9 @@ impl ToolOutput for ExecCommandToolOutput {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(JsonValue::String(self.truncated_output()))
|
||||
Some(JsonValue::String(
|
||||
self.truncated_output(self.model_output_max_tokens()),
|
||||
))
|
||||
}
|
||||
|
||||
fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue {
|
||||
@@ -381,7 +384,10 @@ impl ToolOutput for ExecCommandToolOutput {
|
||||
exit_code: self.exit_code,
|
||||
session_id: self.process_id,
|
||||
original_token_count: self.original_token_count,
|
||||
output: self.truncated_output(),
|
||||
output: match self.max_output_tokens {
|
||||
Some(max_tokens) => self.truncated_output(max_tokens),
|
||||
None => String::from_utf8_lossy(&self.raw_output).to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
serde_json::to_value(result).unwrap_or_else(|err| {
|
||||
@@ -391,9 +397,12 @@ impl ToolOutput for ExecCommandToolOutput {
|
||||
}
|
||||
|
||||
impl ExecCommandToolOutput {
|
||||
pub(crate) fn truncated_output(&self) -> String {
|
||||
fn model_output_max_tokens(&self) -> usize {
|
||||
resolve_max_tokens(self.max_output_tokens).min(self.truncation_policy.token_budget())
|
||||
}
|
||||
|
||||
pub(crate) fn truncated_output(&self, max_tokens: usize) -> String {
|
||||
let text = String::from_utf8_lossy(&self.raw_output).to_string();
|
||||
let max_tokens = resolve_max_tokens(self.max_output_tokens);
|
||||
formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens))
|
||||
}
|
||||
|
||||
@@ -420,7 +429,7 @@ impl ExecCommandToolOutput {
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
sections.push(self.truncated_output());
|
||||
sections.push(self.truncated_output(self.model_output_max_tokens()));
|
||||
|
||||
sections.join("\n")
|
||||
}
|
||||
|
||||
@@ -429,6 +429,7 @@ fn exec_command_tool_output_formats_truncated_response() {
|
||||
chunk_id: "abc123".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(1250),
|
||||
raw_output: b"token one token two token three token four token five".to_vec(),
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
max_output_tokens: Some(4),
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
|
||||
@@ -7,10 +7,8 @@ use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::hook_names::HookToolName;
|
||||
use crate::tools::registry::PostToolUsePayload;
|
||||
use crate::unified_exec::resolve_max_tokens;
|
||||
use codex_protocol::models::AdditionalPermissionProfile;
|
||||
use codex_tools::UnifiedExecShellMode;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use serde::Deserialize;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -72,13 +70,6 @@ fn default_tty() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn effective_max_output_tokens(
|
||||
max_output_tokens: Option<usize>,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> usize {
|
||||
resolve_max_tokens(max_output_tokens).min(truncation_policy.token_budget())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ResolvedCommand {
|
||||
pub(crate) command: Vec<String>,
|
||||
|
||||
@@ -36,7 +36,6 @@ use super::super::shell_spec::CommandToolOptions;
|
||||
use super::super::shell_spec::create_exec_command_tool_with_environment_id;
|
||||
use super::ExecCommandArgs;
|
||||
use super::ExecCommandEnvironmentArgs;
|
||||
use super::effective_max_output_tokens;
|
||||
use super::get_command;
|
||||
use super::post_unified_exec_tool_use_payload;
|
||||
|
||||
@@ -162,8 +161,6 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
|
||||
prefix_rule,
|
||||
..
|
||||
} = args;
|
||||
let max_output_tokens =
|
||||
effective_max_output_tokens(max_output_tokens, turn.truncation_policy);
|
||||
|
||||
let exec_permission_approvals_enabled =
|
||||
session.features().enabled(Feature::ExecPermissionApprovals);
|
||||
@@ -241,7 +238,8 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
|
||||
chunk_id: String::new(),
|
||||
wall_time: std::time::Duration::ZERO,
|
||||
raw_output: output.into_text().into_bytes(),
|
||||
max_output_tokens: Some(max_output_tokens),
|
||||
truncation_policy: turn.truncation_policy,
|
||||
max_output_tokens,
|
||||
process_id: None,
|
||||
exit_code: None,
|
||||
original_token_count: None,
|
||||
@@ -258,7 +256,7 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
|
||||
hook_command: hook_command.clone(),
|
||||
process_id,
|
||||
yield_time_ms,
|
||||
max_output_tokens: Some(max_output_tokens),
|
||||
max_output_tokens,
|
||||
cwd,
|
||||
sandbox_cwd: turn_environment.cwd.clone(),
|
||||
environment,
|
||||
@@ -284,7 +282,8 @@ impl ToolExecutor<ToolInvocation> for ExecCommandHandler {
|
||||
chunk_id: generate_chunk_id(),
|
||||
wall_time: output.duration,
|
||||
raw_output: output_text.into_bytes(),
|
||||
max_output_tokens: Some(max_output_tokens),
|
||||
truncation_policy: turn.truncation_policy,
|
||||
max_output_tokens,
|
||||
// Sandbox denial is terminal, so there is no live
|
||||
// process for write_stdin to resume.
|
||||
process_id: None,
|
||||
|
||||
@@ -14,7 +14,6 @@ use codex_tools::ToolSpec;
|
||||
use serde::Deserialize;
|
||||
|
||||
use super::super::shell_spec::create_write_stdin_tool;
|
||||
use super::effective_max_output_tokens;
|
||||
use super::post_unified_exec_tool_use_payload;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -62,8 +61,6 @@ impl ToolExecutor<ToolInvocation> for WriteStdinHandler {
|
||||
};
|
||||
|
||||
let args: WriteStdinArgs = parse_arguments(&arguments)?;
|
||||
let max_output_tokens =
|
||||
effective_max_output_tokens(args.max_output_tokens, turn.truncation_policy);
|
||||
let response = session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
@@ -71,21 +68,29 @@ impl ToolExecutor<ToolInvocation> for WriteStdinHandler {
|
||||
process_id: args.session_id,
|
||||
input: &args.chars,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: Some(max_output_tokens),
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
truncation_policy: turn.truncation_policy,
|
||||
})
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("write_stdin failed: {err}"))
|
||||
})?;
|
||||
|
||||
let interaction = TerminalInteractionEvent {
|
||||
call_id: response.event_call_id.clone(),
|
||||
process_id: args.session_id.to_string(),
|
||||
stdin: args.chars.clone(),
|
||||
};
|
||||
session
|
||||
.send_event(turn.as_ref(), EventMsg::TerminalInteraction(interaction))
|
||||
.await;
|
||||
// Empty stdin is a background poll, so emit it only while there is
|
||||
// still a live process for the UI to wait on. Non-empty stdin is a real
|
||||
// terminal interaction and should remain visible even if it completes
|
||||
// the process before the response returns.
|
||||
if !args.chars.is_empty() || response.process_id.is_some() {
|
||||
let process_id = response.process_id.unwrap_or(args.session_id);
|
||||
let interaction = TerminalInteractionEvent {
|
||||
call_id: response.event_call_id.clone(),
|
||||
process_id: process_id.to_string(),
|
||||
stdin: args.chars.clone(),
|
||||
};
|
||||
session
|
||||
.send_event(turn.as_ref(), EventMsg::TerminalInteraction(interaction))
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(boxed_tool_output(response))
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::shell::default_user_shell;
|
||||
use codex_tools::UnifiedExecShellMode;
|
||||
use codex_tools::ZshForkConfig;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -17,6 +18,8 @@ use crate::tools::registry::CoreToolRuntime;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
const TEST_TRUNCATION_POLICY: TruncationPolicy = TruncationPolicy::Tokens(10_000);
|
||||
|
||||
async fn invocation_for_payload(
|
||||
tool_name: &str,
|
||||
call_id: &str,
|
||||
@@ -258,6 +261,7 @@ async fn exec_command_post_tool_use_payload_uses_output_for_noninteractive_one_s
|
||||
chunk_id: "chunk-1".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"three".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
@@ -287,6 +291,7 @@ async fn exec_command_post_tool_use_payload_uses_output_for_interactive_completi
|
||||
chunk_id: "chunk-1".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"three".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
@@ -317,6 +322,7 @@ async fn exec_command_post_tool_use_payload_skips_running_sessions() {
|
||||
chunk_id: "chunk-1".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"three".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: Some(45),
|
||||
exit_code: None,
|
||||
@@ -342,6 +348,7 @@ async fn write_stdin_post_tool_use_payload_uses_original_exec_call_id_and_comman
|
||||
chunk_id: "chunk-2".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"finished\n".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
@@ -372,6 +379,7 @@ async fn write_stdin_post_tool_use_payload_keeps_parallel_session_metadata_separ
|
||||
chunk_id: "chunk-a".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"alpha\n".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
@@ -383,6 +391,7 @@ async fn write_stdin_post_tool_use_payload_keeps_parallel_session_metadata_separ
|
||||
chunk_id: "chunk-b".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(498),
|
||||
raw_output: b"beta\n".to_vec(),
|
||||
truncation_policy: TEST_TRUNCATION_POLICY,
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
|
||||
@@ -31,6 +31,7 @@ use codex_exec_server::Environment;
|
||||
use codex_network_proxy::NetworkProxy;
|
||||
use codex_protocol::models::AdditionalPermissionProfile;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use rand::Rng;
|
||||
use rand::rng;
|
||||
use tokio::sync::Mutex;
|
||||
@@ -111,6 +112,7 @@ pub(crate) struct WriteStdinRequest<'a> {
|
||||
pub input: &'a str,
|
||||
pub yield_time_ms: u64,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub truncation_policy: TruncationPolicy,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::tools::context::ExecCommandToolOutput;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
use crate::unified_exec::process::OutputHandles;
|
||||
use codex_sandboxing::SandboxType;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use codex_utils_output_truncation::approx_token_count;
|
||||
use core_test_support::get_remote_test_env;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
@@ -162,6 +163,7 @@ async fn exec_command_with_tty(
|
||||
chunk_id: generate_chunk_id(),
|
||||
wall_time,
|
||||
raw_output: collected,
|
||||
truncation_policy: turn.truncation_policy,
|
||||
max_output_tokens: None,
|
||||
process_id: response_process_id,
|
||||
exit_code,
|
||||
@@ -195,6 +197,7 @@ async fn write_stdin(
|
||||
input,
|
||||
yield_time_ms,
|
||||
max_output_tokens: None,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
})
|
||||
.await
|
||||
}
|
||||
@@ -260,7 +263,9 @@ async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
assert!(
|
||||
out_2.truncated_output().contains("codex"),
|
||||
out_2
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("codex"),
|
||||
"expected environment variable output"
|
||||
);
|
||||
|
||||
@@ -301,7 +306,9 @@ async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
|
||||
"short command should not report a process id if it exits quickly"
|
||||
);
|
||||
assert!(
|
||||
!out_2.truncated_output().contains("codex"),
|
||||
!out_2
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("codex"),
|
||||
"short command should run in a fresh shell"
|
||||
);
|
||||
|
||||
@@ -313,7 +320,9 @@ async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
assert!(
|
||||
out_3.truncated_output().contains("codex"),
|
||||
out_3
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("codex"),
|
||||
"session should preserve state"
|
||||
);
|
||||
|
||||
@@ -350,7 +359,9 @@ async fn unified_exec_timeouts() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
assert!(
|
||||
!out_2.truncated_output().contains(TEST_VAR_VALUE),
|
||||
!out_2
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains(TEST_VAR_VALUE),
|
||||
"timeout too short should yield incomplete output"
|
||||
);
|
||||
|
||||
@@ -359,7 +370,9 @@ async fn unified_exec_timeouts() -> anyhow::Result<()> {
|
||||
let out_3 = write_stdin(&session, process_id, "", /*yield_time_ms*/ 100).await?;
|
||||
|
||||
assert!(
|
||||
out_3.truncated_output().contains(TEST_VAR_VALUE),
|
||||
out_3
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains(TEST_VAR_VALUE),
|
||||
"subsequent poll should retrieve output"
|
||||
);
|
||||
|
||||
@@ -394,7 +407,9 @@ async fn unified_exec_pause_blocks_yield_timeout() -> anyhow::Result<()> {
|
||||
"pause should block the unified exec yield timeout"
|
||||
);
|
||||
assert!(
|
||||
response.truncated_output().contains("unified-exec-done"),
|
||||
response
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("unified-exec-done"),
|
||||
"exec_command should wait for output after the pause lifts"
|
||||
);
|
||||
assert!(
|
||||
@@ -420,7 +435,11 @@ async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
|
||||
.await?;
|
||||
|
||||
assert!(result.process_id.is_some());
|
||||
assert!(result.truncated_output().contains("codex"));
|
||||
assert!(
|
||||
result
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("codex")
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -442,7 +461,11 @@ async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
|
||||
result.process_id.is_some(),
|
||||
"completed command should report a process id"
|
||||
);
|
||||
assert!(result.truncated_output().contains("codex"));
|
||||
assert!(
|
||||
result
|
||||
.truncated_output(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
.contains("codex")
|
||||
);
|
||||
|
||||
assert!(
|
||||
session
|
||||
|
||||
@@ -581,6 +581,7 @@ impl UnifiedExecProcessManager {
|
||||
chunk_id,
|
||||
wall_time,
|
||||
raw_output: collected,
|
||||
truncation_policy: context.turn.truncation_policy,
|
||||
max_output_tokens: request.max_output_tokens,
|
||||
process_id: response_process_id,
|
||||
exit_code,
|
||||
@@ -691,8 +692,8 @@ impl UnifiedExecProcessManager {
|
||||
|
||||
// After polling, refresh_process_state tells us whether the PTY is
|
||||
// still alive or has exited and been removed from the store; we thread
|
||||
// that through so the handler can tag TerminalInteraction with an
|
||||
// appropriate process_id and exit_code.
|
||||
// that through so the handler can tag or suppress TerminalInteraction
|
||||
// with an appropriate process_id and exit_code.
|
||||
let status = if let Some(status) = status_after_write {
|
||||
status
|
||||
} else {
|
||||
@@ -725,6 +726,7 @@ impl UnifiedExecProcessManager {
|
||||
chunk_id,
|
||||
wall_time,
|
||||
raw_output: collected,
|
||||
truncation_policy: request.truncation_policy,
|
||||
max_output_tokens: request.max_output_tokens,
|
||||
process_id,
|
||||
exit_code,
|
||||
|
||||
@@ -5,6 +5,7 @@ use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_config::types::McpServerConfig;
|
||||
use codex_config::types::McpServerTransportConfig;
|
||||
use codex_core::config::Config;
|
||||
use codex_features::Feature;
|
||||
use codex_login::CodexAuth;
|
||||
use codex_models_manager::bundled_models_response;
|
||||
@@ -144,11 +145,21 @@ async fn run_code_mode_turn(
|
||||
server: &MockServer,
|
||||
prompt: &str,
|
||||
code: &str,
|
||||
) -> Result<(TestCodex, ResponseMock)> {
|
||||
run_code_mode_turn_with_config(server, prompt, code, |_| {}).await
|
||||
}
|
||||
|
||||
async fn run_code_mode_turn_with_config(
|
||||
server: &MockServer,
|
||||
prompt: &str,
|
||||
code: &str,
|
||||
configure: impl FnOnce(&mut Config) + Send + 'static,
|
||||
) -> Result<(TestCodex, ResponseMock)> {
|
||||
let mut builder = test_codex()
|
||||
.with_model("test-gpt-5.1-codex")
|
||||
.with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
configure(config);
|
||||
});
|
||||
let test = builder.build(server).await?;
|
||||
|
||||
@@ -292,8 +303,7 @@ text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marke
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
let items = custom_tool_output_items(&second_mock.single_request(), "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
@@ -645,40 +655,217 @@ text(JSON.stringify(results));
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
|
||||
async fn code_mode_exec_command_explicit_max_output_tokens_truncates() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec to truncate the final result",
|
||||
r#"// @exec: {"max_output_tokens": 6}
|
||||
text(JSON.stringify(await tools.exec_command({
|
||||
cmd: "printf 'token one token two token three token four token five token six token seven'",
|
||||
max_output_tokens: 100
|
||||
})));
|
||||
"use exec_command from code mode",
|
||||
r#"
|
||||
const result = await tools.exec_command({
|
||||
cmd: "printf '0123456789012345678901234567890123456789'",
|
||||
max_output_tokens: 5
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let items = custom_tool_output_items(&req, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
text_item(&items, /*index*/ 0),
|
||||
"Total output lines: 1\n\n0123456789…5 tokens truncated…0123456789"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_explicit_max_above_default_preserves_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 20000}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "python3 -c \"import sys; sys.stdout.write('x' * 50000)\"",
|
||||
max_output_tokens: 20000
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
"x".repeat(50_000)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_explicit_max_above_default_truncates_larger_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 25000}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "python3 -c \"import sys; sys.stdout.write('A' * 90000)\"",
|
||||
max_output_tokens: 20000
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
format!(
|
||||
"Total output lines: 1\n\n{}…2500 tokens truncated…{}",
|
||||
"A".repeat(40_000),
|
||||
"A".repeat(40_000)
|
||||
)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_explicit_max_above_truncation_policy_preserves_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn_with_config(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 20000}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "python3 -c \"import sys; sys.stdout.write('x' * 50000)\"",
|
||||
max_output_tokens: 20000
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
|config| {
|
||||
config.tool_output_token_limit = Some(50);
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
"x".repeat(50_000)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_without_max_preserves_output_beyond_default() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 20000}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "python3 -c \"import sys; sys.stdout.write('x' * 50000)\""
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
"x".repeat(50_000)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_without_max_preserves_output_beyond_truncation_policy() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn_with_config(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 20000}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "python3 -c \"import sys; sys.stdout.write('x' * 50000)\""
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
|config| {
|
||||
config.tool_output_token_limit = Some(50);
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
"x".repeat(50_000)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_explicit_max_output_tokens_truncates() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec_command from code mode",
|
||||
r#"// @exec: {"max_output_tokens": 5}
|
||||
const result = await tools.exec_command({
|
||||
cmd: "printf '0123456789012345678901234567890123456789'"
|
||||
});
|
||||
text(result.output);
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
text_item(
|
||||
&custom_tool_output_items(&second_mock.single_request(), "call-1"),
|
||||
/*index*/ 1
|
||||
),
|
||||
"Total output lines: 1\n\n0123456789…5 tokens truncated…0123456789"
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
Total\ output\ lines:\ 1\n
|
||||
\n
|
||||
.*…\d+\ tokens\ truncated….*
|
||||
\z
|
||||
"#;
|
||||
assert_regex_match(expected_pattern, text_item(&items, /*index*/ 1));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1322,6 +1322,7 @@ async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
|
||||
|
||||
let mut begin_events = Vec::new();
|
||||
let mut end_events = Vec::new();
|
||||
let mut terminal_interactions = Vec::new();
|
||||
let mut task_completed = false;
|
||||
loop {
|
||||
let event_msg = wait_for_event(&test.codex, |_| true).await;
|
||||
@@ -1332,6 +1333,9 @@ async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
|
||||
EventMsg::ExecCommandEnd(event) if event.call_id == open_call_id => {
|
||||
end_events.push(event);
|
||||
}
|
||||
EventMsg::TerminalInteraction(event) if event.call_id == open_call_id => {
|
||||
terminal_interactions.push(event);
|
||||
}
|
||||
EventMsg::TurnComplete(_) => {
|
||||
task_completed = true;
|
||||
}
|
||||
@@ -1353,6 +1357,10 @@ async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
|
||||
1,
|
||||
"expected end event for the write_stdin call"
|
||||
);
|
||||
assert!(
|
||||
terminal_interactions.is_empty(),
|
||||
"completed empty polls should not emit terminal interactions: {terminal_interactions:?}"
|
||||
);
|
||||
|
||||
let open_event = &begin_events[0];
|
||||
|
||||
|
||||
@@ -76,12 +76,16 @@ impl ChatWidget {
|
||||
if !self.bottom_pane.is_task_running() {
|
||||
return;
|
||||
}
|
||||
self.flush_answer_stream_with_separator();
|
||||
let command_display = self
|
||||
.unified_exec_processes
|
||||
.iter()
|
||||
.find(|process| process.key == process_id)
|
||||
.map(|process| process.command_display.clone());
|
||||
if stdin.is_empty() && command_display.is_none() {
|
||||
return;
|
||||
}
|
||||
|
||||
self.flush_answer_stream_with_separator();
|
||||
if stdin.is_empty() {
|
||||
// Empty stdin means we are polling for background output.
|
||||
// Surface this in the status indicator (single "waiting" surface) instead of
|
||||
|
||||
@@ -719,6 +719,22 @@ async fn unified_exec_wait_status_header_updates_on_late_command_display() {
|
||||
assert_eq!(status.details(), Some("sleep 5"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_empty_poll_for_finished_process_does_not_show_waiting_status() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
chat.on_task_started();
|
||||
|
||||
terminal_interaction(&mut chat, "call-finished", "proc-finished", "");
|
||||
|
||||
assert_eq!(chat.status_state.current_status.header, "Working");
|
||||
let status = chat
|
||||
.bottom_pane
|
||||
.status_widget()
|
||||
.expect("task status indicator should remain visible");
|
||||
assert_eq!(status.header(), "Working");
|
||||
assert!(chat.unified_exec_wait_streak.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_waiting_multiple_empty_snapshots() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
|
||||
Reference in New Issue
Block a user