mirror of
https://github.com/openai/codex.git
synced 2026-02-03 07:23:39 +00:00
Compare commits
2 Commits
prototype
...
shijie/lin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e1a8703e5 | ||
|
|
8c0d728461 |
151
.github/workflows/rust-release.yml
vendored
151
.github/workflows/rust-release.yml
vendored
@@ -10,6 +10,10 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- "rust-v*.*.*"
|
||||
pull_request:
|
||||
paths:
|
||||
- ".github/workflows/rust-release.yml"
|
||||
- "codex-rs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
@@ -27,6 +31,11 @@ jobs:
|
||||
set -euo pipefail
|
||||
echo "::group::Tag validation"
|
||||
|
||||
if [[ "${GITHUB_EVENT_NAME}" != "push" || "${GITHUB_REF_TYPE}" != "tag" ]]; then
|
||||
echo "ℹ️ Skipping tag validation for ${GITHUB_EVENT_NAME} event"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 1. Must be a tag and match the regex
|
||||
[[ "${GITHUB_REF_TYPE}" == "tag" ]] \
|
||||
|| { echo "❌ Not a tag push"; exit 1; }
|
||||
@@ -49,6 +58,9 @@ jobs:
|
||||
needs: tag-check
|
||||
name: Build - ${{ matrix.runner }} - ${{ matrix.target }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
timeout-minutes: 30
|
||||
defaults:
|
||||
run:
|
||||
@@ -58,10 +70,10 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: macos-15-xlarge
|
||||
target: aarch64-apple-darwin
|
||||
- runner: macos-15-xlarge
|
||||
target: x86_64-apple-darwin
|
||||
# - runner: macos-15-xlarge
|
||||
# target: aarch64-apple-darwin
|
||||
# - runner: macos-15-xlarge
|
||||
# target: x86_64-apple-darwin
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04
|
||||
@@ -70,10 +82,10 @@ jobs:
|
||||
target: aarch64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-gnu
|
||||
- runner: windows-latest
|
||||
target: x86_64-pc-windows-msvc
|
||||
- runner: windows-11-arm
|
||||
target: aarch64-pc-windows-msvc
|
||||
# - runner: windows-latest
|
||||
# target: x86_64-pc-windows-msvc
|
||||
# - runner: windows-11-arm
|
||||
# target: aarch64-pc-windows-msvc
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
@@ -100,7 +112,7 @@ jobs:
|
||||
- name: Cargo build
|
||||
run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy
|
||||
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' }}
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' && github.event_name == 'push' }}
|
||||
name: Configure Apple code signing
|
||||
shell: bash
|
||||
env:
|
||||
@@ -185,7 +197,7 @@ jobs:
|
||||
echo "APPLE_CODESIGN_KEYCHAIN=$keychain_path" >> "$GITHUB_ENV"
|
||||
echo "::add-mask::$APPLE_CODESIGN_IDENTITY"
|
||||
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' }}
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' && github.event_name == 'push' }}
|
||||
name: Sign macOS binaries
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -206,7 +218,7 @@ jobs:
|
||||
codesign --force --options runtime --timestamp --sign "$APPLE_CODESIGN_IDENTITY" "${keychain_args[@]}" "$path"
|
||||
done
|
||||
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' }}
|
||||
- if: ${{ matrix.runner == 'macos-15-xlarge' && github.event_name == 'push' }}
|
||||
name: Notarize macOS binaries
|
||||
shell: bash
|
||||
env:
|
||||
@@ -269,6 +281,78 @@ jobs:
|
||||
notarize_binary "codex"
|
||||
notarize_binary "codex-responses-api-proxy"
|
||||
|
||||
- if: ${{ startsWith(matrix.runner, 'windows') && github.event_name == 'push' }}
|
||||
name: Configure Windows code signing
|
||||
shell: pwsh
|
||||
env:
|
||||
WINDOWS_CODESIGN_CERTIFICATE_PFX: ${{ secrets.WINDOWS_CODESIGN_CERTIFICATE_PFX }}
|
||||
WINDOWS_CODESIGN_CERTIFICATE_PASSWORD: ${{ secrets.WINDOWS_CODESIGN_CERTIFICATE_PASSWORD }}
|
||||
run: |
|
||||
Set-StrictMode -Version Latest
|
||||
|
||||
if (-not $env:WINDOWS_CODESIGN_CERTIFICATE_PFX) {
|
||||
Write-Error "WINDOWS_CODESIGN_CERTIFICATE_PFX is required for Windows signing"
|
||||
}
|
||||
|
||||
if (-not $env:WINDOWS_CODESIGN_CERTIFICATE_PASSWORD) {
|
||||
Write-Error "WINDOWS_CODESIGN_CERTIFICATE_PASSWORD is required for Windows signing"
|
||||
}
|
||||
|
||||
$certPath = Join-Path $env:RUNNER_TEMP 'windows_signing_certificate.pfx'
|
||||
[System.IO.File]::WriteAllBytes($certPath, [System.Convert]::FromBase64String($env:WINDOWS_CODESIGN_CERTIFICATE_PFX))
|
||||
|
||||
"WINDOWS_CODESIGN_CERTIFICATE_PATH=$certPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
|
||||
|
||||
- if: ${{ startsWith(matrix.runner, 'windows') && github.event_name == 'push' }}
|
||||
name: Sign Windows binaries
|
||||
shell: pwsh
|
||||
env:
|
||||
WINDOWS_CODESIGN_CERTIFICATE_PASSWORD: ${{ secrets.WINDOWS_CODESIGN_CERTIFICATE_PASSWORD }}
|
||||
MATRIX_TARGET: ${{ matrix.target }}
|
||||
run: |
|
||||
Set-StrictMode -Version Latest
|
||||
|
||||
$certPath = $env:WINDOWS_CODESIGN_CERTIFICATE_PATH
|
||||
if (-not $certPath) {
|
||||
Write-Error "WINDOWS_CODESIGN_CERTIFICATE_PATH is required for Windows signing"
|
||||
}
|
||||
|
||||
if (-not (Test-Path $certPath)) {
|
||||
Write-Error "Certificate file not found at $certPath"
|
||||
}
|
||||
|
||||
$arch = if ($env:MATRIX_TARGET -eq 'aarch64-pc-windows-msvc') { 'arm64' } else { 'x64' }
|
||||
$signtoolSearchRoot = "${env:ProgramFiles(x86)}\Windows Kits\10\bin"
|
||||
$signtool = Get-ChildItem -Path $signtoolSearchRoot -Recurse -Filter signtool.exe | Where-Object { $_.FullName -match "\\$arch\\" } | Sort-Object FullName -Descending | Select-Object -First 1
|
||||
if (-not $signtool) {
|
||||
$signtool = Get-ChildItem -Path $signtoolSearchRoot -Recurse -Filter signtool.exe | Sort-Object FullName -Descending | Select-Object -First 1
|
||||
}
|
||||
|
||||
if (-not $signtool) {
|
||||
Write-Error "signtool.exe not found"
|
||||
}
|
||||
|
||||
$binaries = @(
|
||||
"target/${{ matrix.target }}/release/codex.exe",
|
||||
"target/${{ matrix.target }}/release/codex-responses-api-proxy.exe"
|
||||
)
|
||||
|
||||
foreach ($binary in $binaries) {
|
||||
if (-not (Test-Path $binary)) {
|
||||
Write-Error "Binary $binary not found"
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($binary in $binaries) {
|
||||
& $signtool.FullName sign `
|
||||
/fd SHA256 `
|
||||
/td SHA256 `
|
||||
/tr http://timestamp.digicert.com `
|
||||
/f $certPath `
|
||||
/p $env:WINDOWS_CODESIGN_CERTIFICATE_PASSWORD `
|
||||
$binary
|
||||
}
|
||||
|
||||
- name: Stage artifacts
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -327,8 +411,40 @@ jobs:
|
||||
zstd -T0 -19 --rm "$dest/$base"
|
||||
done
|
||||
|
||||
- if: ${{ contains(matrix.target, 'unknown-linux')}}
|
||||
name: Install cosign
|
||||
uses: sigstore/cosign-installer@v3.7.0
|
||||
|
||||
- if: ${{ contains(matrix.target, 'unknown-linux')}}
|
||||
name: Cosign Linux artifacts
|
||||
shell: bash
|
||||
env:
|
||||
COSIGN_EXPERIMENTAL: "1"
|
||||
COSIGN_YES: "true"
|
||||
COSIGN_OIDC_CLIENT_ID: "sigstore"
|
||||
COSIGN_OIDC_ISSUER: "https://oauth2.sigstore.dev/auth"
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
dest="dist/${{ matrix.target }}"
|
||||
if [[ ! -d "$dest" ]]; then
|
||||
echo "Destination $dest does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shopt -s nullglob
|
||||
for artifact in "$dest"/*; do
|
||||
if [[ -f "$artifact" ]]; then
|
||||
cosign sign-blob \
|
||||
--yes \
|
||||
--output-signature "${artifact}.sig" \
|
||||
--output-certificate "${artifact}.pem" \
|
||||
"$artifact"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Remove signing keychain
|
||||
if: ${{ always() && matrix.runner == 'macos-15-xlarge' }}
|
||||
if: ${{ always() && matrix.runner == 'macos-15-xlarge' && github.event_name == 'push' }}
|
||||
shell: bash
|
||||
env:
|
||||
APPLE_CODESIGN_KEYCHAIN: ${{ env.APPLE_CODESIGN_KEYCHAIN }}
|
||||
@@ -350,6 +466,13 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Remove Windows signing certificate
|
||||
if: ${{ always() && startsWith(matrix.runner, 'windows') && github.event_name == 'push' }}
|
||||
shell: pwsh
|
||||
run: |
|
||||
if ($env:WINDOWS_CODESIGN_CERTIFICATE_PATH -and (Test-Path $env:WINDOWS_CODESIGN_CERTIFICATE_PATH)) {
|
||||
Remove-Item -Force $env:WINDOWS_CODESIGN_CERTIFICATE_PATH
|
||||
}
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.target }}
|
||||
@@ -359,6 +482,7 @@ jobs:
|
||||
codex-rs/dist/${{ matrix.target }}/*
|
||||
|
||||
release:
|
||||
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/rust-v') }}
|
||||
needs: build
|
||||
name: release
|
||||
runs-on: ubuntu-latest
|
||||
@@ -455,8 +579,8 @@ jobs:
|
||||
# July 31, 2025: https://github.blog/changelog/2025-07-31-npm-trusted-publishing-with-oidc-is-generally-available/
|
||||
# npm docs: https://docs.npmjs.com/trusted-publishers
|
||||
publish-npm:
|
||||
# Publish to npm for stable releases and alpha pre-releases with numeric suffixes.
|
||||
if: ${{ needs.release.outputs.should_publish_npm == 'true' }}
|
||||
# Publish to npm for stable releases and alpha pre-releases with numeric suffixes.
|
||||
name: publish-npm
|
||||
needs: release
|
||||
runs-on: ubuntu-latest
|
||||
@@ -520,6 +644,7 @@ jobs:
|
||||
done
|
||||
|
||||
update-branch:
|
||||
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/rust-v') }}
|
||||
name: Update latest-alpha-cli branch
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::path::PathBuf;
|
||||
@@ -10,7 +11,7 @@ use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
use crate::review_format::format_review_findings_block;
|
||||
use crate::terminal;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use async_channel::Receiver;
|
||||
@@ -19,6 +20,7 @@ use codex_apply_patch::ApplyPatchAction;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::protocol::ConversationPathResponseEvent;
|
||||
use codex_protocol::protocol::ExitedReviewModeEvent;
|
||||
use codex_protocol::protocol::ItemCompletedEvent;
|
||||
use codex_protocol::protocol::ItemStartedEvent;
|
||||
use codex_protocol::protocol::ReviewRequest;
|
||||
@@ -45,6 +47,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
use tracing::info;
|
||||
use tracing::trace;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::ModelProviderInfo;
|
||||
@@ -83,6 +86,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
|
||||
use crate::protocol::Op;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::ReviewOutputEvent;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::StreamErrorEvent;
|
||||
@@ -263,6 +267,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) sandbox_policy: SandboxPolicy,
|
||||
pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
|
||||
pub(crate) tools_config: ToolsConfig,
|
||||
pub(crate) is_review_mode: bool,
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
@@ -397,6 +402,7 @@ impl Session {
|
||||
sandbox_policy: session_configuration.sandbox_policy.clone(),
|
||||
shell_environment_policy: config.shell_environment_policy.clone(),
|
||||
tools_config,
|
||||
is_review_mode: false,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
}
|
||||
@@ -628,14 +634,6 @@ impl Session {
|
||||
state.session_configuration = state.session_configuration.apply(&updates);
|
||||
}
|
||||
|
||||
pub(crate) async fn base_config(&self) -> Arc<Config> {
|
||||
let state = self.state.lock().await;
|
||||
state
|
||||
.session_configuration
|
||||
.original_config_do_not_use
|
||||
.clone()
|
||||
}
|
||||
|
||||
pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
|
||||
let sub_id = self.next_internal_sub_id();
|
||||
self.new_turn_with_sub_id(sub_id, updates).await
|
||||
@@ -858,7 +856,7 @@ impl Session {
|
||||
|
||||
/// Records input items: always append to conversation history and
|
||||
/// persist these response items to rollout.
|
||||
pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
self.record_into_history(items).await;
|
||||
self.persist_rollout_response_items(items).await;
|
||||
}
|
||||
@@ -875,7 +873,7 @@ impl Session {
|
||||
history.record_items(std::iter::once(response_item));
|
||||
}
|
||||
RolloutItem::Compacted(compacted) => {
|
||||
let snapshot = history.get_history();
|
||||
let snapshot = history.contents();
|
||||
let user_messages = collect_user_messages(&snapshot);
|
||||
let rebuilt = build_compacted_history(
|
||||
self.build_initial_context(turn_context),
|
||||
@@ -887,7 +885,7 @@ impl Session {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
history.get_history()
|
||||
history.contents()
|
||||
}
|
||||
|
||||
/// Append ResponseItems to the in-memory conversation history only.
|
||||
@@ -936,15 +934,9 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
// todo (aibrahim): get rid of this method. we shouldn't deal with vec[resposne_item] and rather use ConversationHistory.
|
||||
pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
|
||||
let mut state = self.state.lock().await;
|
||||
state.history_snapshot()
|
||||
}
|
||||
|
||||
pub(crate) async fn clone_history(&self) -> ConversationHistory {
|
||||
let state = self.state.lock().await;
|
||||
state.clone_history()
|
||||
state.history_snapshot()
|
||||
}
|
||||
|
||||
async fn update_token_usage_info(
|
||||
@@ -1038,6 +1030,16 @@ impl Session {
|
||||
self.send_event(turn_context, event).await;
|
||||
}
|
||||
|
||||
/// Build the full turn input by concatenating the current conversation
|
||||
/// history with additional items for this turn.
|
||||
pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
|
||||
let history = {
|
||||
let state = self.state.lock().await;
|
||||
state.history_snapshot()
|
||||
};
|
||||
[history, extra].concat()
|
||||
}
|
||||
|
||||
/// Returns the input if there was no task running to inject into
|
||||
pub async fn inject_input(&self, input: Vec<UserInput>) -> Result<(), Vec<UserInput>> {
|
||||
let mut active = self.active_turn.lock().await;
|
||||
@@ -1470,6 +1472,7 @@ async fn spawn_review_thread(
|
||||
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
||||
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
||||
cwd: parent_turn_context.cwd.clone(),
|
||||
is_review_mode: true,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
@@ -1519,8 +1522,19 @@ pub(crate) async fn run_task(
|
||||
sess.send_event(&turn_context, event).await;
|
||||
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
||||
.await;
|
||||
// For review threads, keep an isolated in-memory history so the
|
||||
// model sees a fresh conversation without the parent session's history.
|
||||
// For normal turns, continue recording to the session history as before.
|
||||
let is_review_mode = turn_context.is_review_mode;
|
||||
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
|
||||
if is_review_mode {
|
||||
// Seed review threads with environment context so the model knows the working directory.
|
||||
review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
|
||||
review_thread_history.push(initial_input_for_turn.into());
|
||||
} else {
|
||||
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut last_agent_message: Option<String> = None;
|
||||
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
|
||||
@@ -1549,9 +1563,14 @@ pub(crate) async fn run_task(
|
||||
// conversation history on each turn. The rollout file, however, should
|
||||
// only record the new items that originated in this turn so that it
|
||||
// represents an append-only log without duplicates.
|
||||
let turn_input: Vec<ResponseItem> = {
|
||||
let turn_input: Vec<ResponseItem> = if is_review_mode {
|
||||
if !pending_input.is_empty() {
|
||||
review_thread_history.extend(pending_input);
|
||||
}
|
||||
review_thread_history.clone()
|
||||
} else {
|
||||
sess.record_conversation_items(&pending_input).await;
|
||||
sess.history_snapshot().await
|
||||
sess.turn_input_with_history(pending_input).await
|
||||
};
|
||||
|
||||
let turn_input_messages: Vec<String> = turn_input
|
||||
@@ -1592,8 +1611,109 @@ pub(crate) async fn run_task(
|
||||
let token_limit_reached = total_usage_tokens
|
||||
.map(|tokens| tokens >= limit)
|
||||
.unwrap_or(false);
|
||||
let (responses, items_to_record_in_conversation_history) =
|
||||
process_items(processed_items, &sess).await;
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in processed_items {
|
||||
let ProcessedResponseItem { item, response } = processed_response_item;
|
||||
match (&item, &response) {
|
||||
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
|
||||
// If the model returned a message, we need to record it.
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
}
|
||||
(
|
||||
ResponseItem::LocalShellCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::CustomToolCall { .. },
|
||||
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let output = match result {
|
||||
Ok(call_tool_result) => {
|
||||
convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result,
|
||||
)
|
||||
}
|
||||
Err(err) => FunctionCallOutputPayload {
|
||||
content: err.clone(),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output,
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
},
|
||||
None,
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary.clone(),
|
||||
content: content.clone(),
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
}
|
||||
};
|
||||
if let Some(response) = response {
|
||||
responses.push(response);
|
||||
}
|
||||
}
|
||||
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
if is_review_mode {
|
||||
review_thread_history
|
||||
.extend(items_to_record_in_conversation_history.clone());
|
||||
} else {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
@@ -1632,10 +1752,7 @@ pub(crate) async fn run_task(
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
}) => {
|
||||
let _ = process_items(processed_items, &sess).await;
|
||||
Err(CodexErr::TurnAborted) => {
|
||||
// Aborted turn is reported via a different event.
|
||||
break;
|
||||
}
|
||||
@@ -1651,9 +1768,50 @@ pub(crate) async fn run_task(
|
||||
}
|
||||
}
|
||||
|
||||
// If this was a review thread and we have a final assistant message,
|
||||
// try to parse it as a ReviewOutput.
|
||||
//
|
||||
// If parsing fails, construct a minimal ReviewOutputEvent using the plain
|
||||
// text as the overall explanation. Else, just exit review mode with None.
|
||||
//
|
||||
// Emits an ExitedReviewMode event with the parsed review output.
|
||||
if turn_context.is_review_mode {
|
||||
exit_review_mode(
|
||||
sess.clone(),
|
||||
Arc::clone(&turn_context),
|
||||
last_agent_message.as_deref().map(parse_review_output_event),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
last_agent_message
|
||||
}
|
||||
|
||||
/// Parse the review output; when not valid JSON, build a structured
|
||||
/// fallback that carries the plain text as the overall explanation.
|
||||
///
|
||||
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
|
||||
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
|
||||
// Try direct parse first
|
||||
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
|
||||
return ev;
|
||||
}
|
||||
// If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
|
||||
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
|
||||
&& start < end
|
||||
&& let Some(slice) = text.get(start..=end)
|
||||
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
|
||||
{
|
||||
return ev;
|
||||
}
|
||||
// Not JSON – return a structured ReviewOutputEvent that carries
|
||||
// the plain text as the overall explanation.
|
||||
ReviewOutputEvent {
|
||||
overall_explanation: text.to_string(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_turn(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
@@ -1695,13 +1853,7 @@ async fn run_turn(
|
||||
.await
|
||||
{
|
||||
Ok(output) => return Ok(output),
|
||||
Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
}) => {
|
||||
return Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
});
|
||||
}
|
||||
Err(CodexErr::TurnAborted) => return Err(CodexErr::TurnAborted),
|
||||
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
||||
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
||||
Err(e @ CodexErr::Fatal(_)) => return Err(e),
|
||||
@@ -1754,9 +1906,9 @@ async fn run_turn(
|
||||
/// "handled" such that it produces a `ResponseInputItem` that needs to be
|
||||
/// sent back to the model on the next turn.
|
||||
#[derive(Debug)]
|
||||
pub struct ProcessedResponseItem {
|
||||
pub item: ResponseItem,
|
||||
pub response: Option<ResponseInputItem>,
|
||||
pub(crate) struct ProcessedResponseItem {
|
||||
pub(crate) item: ResponseItem,
|
||||
pub(crate) response: Option<ResponseInputItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -1775,6 +1927,61 @@ async fn try_run_turn(
|
||||
task_kind: TaskKind,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<TurnRunResult> {
|
||||
// call_ids that are part of this response.
|
||||
let completed_call_ids = prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// call_ids that were pending but are not part of this response.
|
||||
// This usually happens because the user interrupted the model before we responded to one of its tool calls
|
||||
// and then the user sent a follow-up message.
|
||||
let missing_calls = {
|
||||
prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
ResponseItem::CustomToolCall { call_id, .. } => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.filter_map(|call_id| {
|
||||
if completed_call_ids.contains(&call_id) {
|
||||
None
|
||||
} else {
|
||||
Some(call_id.clone())
|
||||
}
|
||||
})
|
||||
.map(|call_id| ResponseItem::CustomToolCallOutput {
|
||||
call_id,
|
||||
output: "aborted".to_string(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let prompt: Cow<Prompt> = if missing_calls.is_empty() {
|
||||
Cow::Borrowed(prompt)
|
||||
} else {
|
||||
// Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
|
||||
let input = [missing_calls, prompt.input.clone()].concat();
|
||||
Cow::Owned(Prompt {
|
||||
input,
|
||||
..prompt.clone()
|
||||
})
|
||||
};
|
||||
|
||||
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
|
||||
cwd: turn_context.cwd.clone(),
|
||||
approval_policy: turn_context.approval_policy,
|
||||
@@ -1783,12 +1990,11 @@ async fn try_run_turn(
|
||||
effort: turn_context.client.get_reasoning_effort(),
|
||||
summary: turn_context.client.get_reasoning_summary(),
|
||||
});
|
||||
|
||||
sess.persist_rollout_items(&[rollout_item]).await;
|
||||
let mut stream = turn_context
|
||||
.client
|
||||
.clone()
|
||||
.stream_with_task_kind(prompt, task_kind)
|
||||
.stream_with_task_kind(prompt.as_ref(), task_kind)
|
||||
.or_cancel(&cancellation_token)
|
||||
.await??;
|
||||
|
||||
@@ -1805,15 +2011,7 @@ async fn try_run_turn(
|
||||
// Poll the next item from the model stream. We must inspect *both* Ok and Err
|
||||
// cases so that transient stream failures (e.g., dropped SSE connection before
|
||||
// `response.completed`) bubble up and trigger the caller's retry logic.
|
||||
let event = match stream.next().or_cancel(&cancellation_token).await {
|
||||
Ok(event) => event,
|
||||
Err(codex_async_utils::CancelErr::Cancelled) => {
|
||||
let processed_items = output.try_collect().await?;
|
||||
return Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
});
|
||||
}
|
||||
};
|
||||
let event = stream.next().or_cancel(&cancellation_token).await?;
|
||||
|
||||
let event = match event {
|
||||
Some(res) => res?,
|
||||
@@ -1837,8 +2035,7 @@ async fn try_run_turn(
|
||||
let payload_preview = call.payload.log_payload().into_owned();
|
||||
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
|
||||
|
||||
let response =
|
||||
tool_runtime.handle_tool_call(call, cancellation_token.child_token());
|
||||
let response = tool_runtime.handle_tool_call(call);
|
||||
|
||||
output.push_back(
|
||||
async move {
|
||||
@@ -1920,7 +2117,12 @@ async fn try_run_turn(
|
||||
} => {
|
||||
sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
|
||||
.await;
|
||||
let processed_items = output.try_collect().await?;
|
||||
|
||||
let processed_items = output
|
||||
.try_collect()
|
||||
.or_cancel(&cancellation_token)
|
||||
.await??;
|
||||
|
||||
let unified_diff = {
|
||||
let mut tracker = turn_diff_tracker.lock().await;
|
||||
tracker.get_unified_diff()
|
||||
@@ -1940,8 +2142,12 @@ async fn try_run_turn(
|
||||
ResponseEvent::OutputTextDelta(delta) => {
|
||||
// In review child threads, suppress assistant text deltas; the
|
||||
// UI will show a selection popup from the final ReviewOutput.
|
||||
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
|
||||
sess.send_event(&turn_context, event).await;
|
||||
if !turn_context.is_review_mode {
|
||||
let event = EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta });
|
||||
sess.send_event(&turn_context, event).await;
|
||||
} else {
|
||||
trace!("suppressing OutputTextDelta in review mode");
|
||||
}
|
||||
}
|
||||
ResponseEvent::ReasoningSummaryDelta(delta) => {
|
||||
let event = EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta });
|
||||
@@ -1976,7 +2182,13 @@ async fn handle_non_tool_response_item(
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => {
|
||||
let turn_item = parse_turn_item(&item);
|
||||
let turn_item = match &item {
|
||||
ResponseItem::Message { .. } if turn_context.is_review_mode => {
|
||||
trace!("suppressing assistant Message in review mode");
|
||||
None
|
||||
}
|
||||
_ => parse_turn_item(&item),
|
||||
};
|
||||
if let Some(turn_item) = turn_item {
|
||||
sess.emit_turn_item_started_completed(
|
||||
turn_context.as_ref(),
|
||||
@@ -2014,7 +2226,7 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
||||
}
|
||||
})
|
||||
}
|
||||
pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
|
||||
fn convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result: &CallToolResult,
|
||||
) -> FunctionCallOutputPayload {
|
||||
let CallToolResult {
|
||||
@@ -2049,6 +2261,58 @@ pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
|
||||
/// and records a developer message with the review output.
|
||||
pub(crate) async fn exit_review_mode(
|
||||
session: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
review_output: Option<ReviewOutputEvent>,
|
||||
) {
|
||||
let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: review_output.clone(),
|
||||
});
|
||||
session.send_event(turn_context.as_ref(), event).await;
|
||||
|
||||
let mut user_message = String::new();
|
||||
if let Some(out) = review_output {
|
||||
let mut findings_str = String::new();
|
||||
let text = out.overall_explanation.trim();
|
||||
if !text.is_empty() {
|
||||
findings_str.push_str(text);
|
||||
}
|
||||
if !out.findings.is_empty() {
|
||||
let block = format_review_findings_block(&out.findings, None);
|
||||
findings_str.push_str(&format!("\n{block}"));
|
||||
}
|
||||
user_message.push_str(&format!(
|
||||
r#"<user_action>
|
||||
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
{findings_str}
|
||||
</results>
|
||||
</user_action>
|
||||
"#));
|
||||
} else {
|
||||
user_message.push_str(r#"<user_action>
|
||||
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
None.
|
||||
</results>
|
||||
</user_action>
|
||||
"#);
|
||||
}
|
||||
|
||||
session
|
||||
.record_conversation_items(&[ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_message }],
|
||||
}])
|
||||
.await;
|
||||
}
|
||||
|
||||
fn mcp_init_error_display(
|
||||
server_name: &str,
|
||||
entry: Option<&McpAuthStatusEntry>,
|
||||
@@ -2528,6 +2792,12 @@ mod tests {
|
||||
sleep(Duration::from_secs(60)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||
if let TaskKind::Review = self.kind {
|
||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -2712,7 +2982,7 @@ mod tests {
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
|
||||
|
||||
let summary1 = "summary one";
|
||||
let snapshot1 = live_history.get_history();
|
||||
let snapshot1 = live_history.contents();
|
||||
let user_messages1 = collect_user_messages(&snapshot1);
|
||||
let rebuilt1 = build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
@@ -2745,7 +3015,7 @@ mod tests {
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
|
||||
|
||||
let summary2 = "summary two";
|
||||
let snapshot2 = live_history.get_history();
|
||||
let snapshot2 = live_history.contents();
|
||||
let user_messages2 = collect_user_messages(&snapshot2);
|
||||
let rebuilt2 = build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
@@ -2777,7 +3047,7 @@ mod tests {
|
||||
live_history.record_items(std::iter::once(&assistant3));
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
|
||||
|
||||
(rollout_items, live_history.get_history())
|
||||
(rollout_items, live_history.contents())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -24,7 +24,6 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use futures::prelude::*;
|
||||
use tracing::error;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
@@ -65,10 +64,9 @@ async fn run_compact_task_inner(
|
||||
input: Vec<UserInput>,
|
||||
) {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
|
||||
let mut history = sess.clone_history().await;
|
||||
history.record_items(&[initial_input_for_turn.into()]);
|
||||
|
||||
let mut turn_input = sess
|
||||
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
|
||||
.await;
|
||||
let mut truncated_count = 0usize;
|
||||
|
||||
let max_retries = turn_context.client.get_provider().stream_max_retries();
|
||||
@@ -85,7 +83,6 @@ async fn run_compact_task_inner(
|
||||
sess.persist_rollout_items(&[rollout_item]).await;
|
||||
|
||||
loop {
|
||||
let turn_input = history.get_history();
|
||||
let prompt = Prompt {
|
||||
input: turn_input.clone(),
|
||||
..Default::default()
|
||||
@@ -110,11 +107,7 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
Err(e @ CodexErr::ContextWindowExceeded) => {
|
||||
if turn_input.len() > 1 {
|
||||
// Trim from the beginning to preserve cache (prefix-based) and keep recent messages intact.
|
||||
error!(
|
||||
"Context window exceeded while compacting; removing oldest history item. Error: {e}"
|
||||
);
|
||||
history.remove_first_item();
|
||||
turn_input.remove(0);
|
||||
truncated_count += 1;
|
||||
retries = 0;
|
||||
continue;
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use tracing::error;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -14,6 +12,11 @@ impl ConversationHistory {
|
||||
Self { items: Vec::new() }
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
pub(crate) fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
}
|
||||
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
@@ -29,287 +32,9 @@ impl ConversationHistory {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_history(&mut self) -> Vec<ResponseItem> {
|
||||
self.normalize_history();
|
||||
self.contents()
|
||||
}
|
||||
|
||||
pub(crate) fn remove_first_item(&mut self) {
|
||||
if !self.items.is_empty() {
|
||||
// Remove the oldest item (front of the list). Items are ordered from
|
||||
// oldest → newest, so index 0 is the first entry recorded.
|
||||
let removed = self.items.remove(0);
|
||||
// If the removed item participates in a call/output pair, also remove
|
||||
// its corresponding counterpart to keep the invariants intact without
|
||||
// running a full normalization pass.
|
||||
self.remove_corresponding_for(&removed);
|
||||
}
|
||||
}
|
||||
|
||||
/// This function enforces a couple of invariants on the in-memory history:
|
||||
/// 1. every call (function/custom) has a corresponding output entry
|
||||
/// 2. every output has a corresponding call entry
|
||||
fn normalize_history(&mut self) {
|
||||
// all function/tool calls must have a corresponding output
|
||||
self.ensure_call_outputs_present();
|
||||
|
||||
// all outputs must have a corresponding function/tool call
|
||||
self.remove_orphan_outputs();
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
}
|
||||
|
||||
fn ensure_call_outputs_present(&mut self) {
|
||||
// Collect synthetic outputs to insert immediately after their calls.
|
||||
// Store the insertion position (index of call) alongside the item so
|
||||
// we can insert in reverse order and avoid index shifting.
|
||||
let mut missing_outputs_to_insert: Vec<(usize, ResponseItem)> = Vec::new();
|
||||
|
||||
for (idx, item) in self.items.iter().enumerate() {
|
||||
match item {
|
||||
ResponseItem::FunctionCall { call_id, .. } => {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Function call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCall { call_id, .. } => {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Custom tool call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
// LocalShellCall is represented in upstream streams by a FunctionCallOutput
|
||||
ResponseItem::LocalShellCall { call_id, .. } => {
|
||||
if let Some(call_id) = call_id.as_ref() {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Local shell call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::Message { .. } => {
|
||||
// nothing to do for these variants
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !missing_outputs_to_insert.is_empty() {
|
||||
// Insert from the end to avoid shifting subsequent indices.
|
||||
missing_outputs_to_insert.sort_by_key(|(i, _)| *i);
|
||||
for (idx, item) in missing_outputs_to_insert.into_iter().rev() {
|
||||
let insert_pos = idx + 1; // place immediately after the call
|
||||
if insert_pos <= self.items.len() {
|
||||
self.items.insert(insert_pos, item);
|
||||
} else {
|
||||
self.items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_orphan_outputs(&mut self) {
|
||||
// Work on a snapshot to avoid borrowing `self.items` while mutating it.
|
||||
let snapshot = self.items.clone();
|
||||
let mut orphan_output_call_ids: std::collections::HashSet<String> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for item in &snapshot {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => {
|
||||
let has_call = snapshot.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(existing),
|
||||
..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_call {
|
||||
error_or_panic(format!("Function call is missing for call id: {call_id}"));
|
||||
orphan_output_call_ids.insert(call_id.clone());
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => {
|
||||
let has_call = snapshot.iter().any(|i| match i {
|
||||
ResponseItem::CustomToolCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_call {
|
||||
error_or_panic(format!(
|
||||
"Custom tool call is missing for call id: {call_id}"
|
||||
));
|
||||
orphan_output_call_ids.insert(call_id.clone());
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::Message { .. } => {
|
||||
// nothing to do for these variants
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !orphan_output_call_ids.is_empty() {
|
||||
let ids = orphan_output_call_ids;
|
||||
self.items.retain(|i| match i {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. }
|
||||
| ResponseItem::CustomToolCallOutput { call_id, .. } => !ids.contains(call_id),
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items;
|
||||
}
|
||||
|
||||
/// Removes the corresponding paired item for the provided `item`, if any.
|
||||
///
|
||||
/// Pairs:
|
||||
/// - FunctionCall <-> FunctionCallOutput
|
||||
/// - CustomToolCall <-> CustomToolCallOutput
|
||||
/// - LocalShellCall(call_id: Some) <-> FunctionCallOutput
|
||||
fn remove_corresponding_for(&mut self, item: &ResponseItem) {
|
||||
match item {
|
||||
ResponseItem::FunctionCall { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::CustomToolCall { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(existing),
|
||||
..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::CustomToolCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the first item matching the predicate.
|
||||
fn remove_first_matching<F>(&mut self, predicate: F)
|
||||
where
|
||||
F: FnMut(&ResponseItem) -> bool,
|
||||
{
|
||||
if let Some(pos) = self.items.iter().position(predicate) {
|
||||
self.items.remove(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_or_panic(message: String) {
|
||||
if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
|
||||
panic!("{message}");
|
||||
} else {
|
||||
error!("{message}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Anything that is not a system message or "reasoning" message is considered
|
||||
@@ -332,11 +57,6 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
use codex_protocol::models::LocalShellExecAction;
|
||||
use codex_protocol::models::LocalShellStatus;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
@@ -348,12 +68,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
|
||||
let mut h = ConversationHistory::new();
|
||||
h.record_items(items.iter());
|
||||
h
|
||||
}
|
||||
|
||||
fn user_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -403,452 +117,4 @@ mod tests {
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_output_for_function_call() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_call_for_output() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-2".to_string(),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_handles_local_shell_pair() {
|
||||
let items = vec![
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("call-3".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-3".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_handles_custom_tool_pair() {
|
||||
let items = vec![
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-1".to_string(),
|
||||
name: "my_tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "tool-1".to_string(),
|
||||
output: "ok".to_string(),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
//TODO(aibrahim): run CI in release mode.
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_function_call() {
|
||||
let items = vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-x".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_custom_tool_call() {
|
||||
let items = vec![ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "tool-x".to_string(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_local_shell_call_with_id() {
|
||||
let items = vec![ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "shell-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_removes_orphan_function_call_output() {
|
||||
let items = vec![ResponseItem::FunctionCallOutput {
|
||||
call_id: "orphan-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_removes_orphan_custom_tool_call_output() {
|
||||
let items = vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "orphan-2".to_string(),
|
||||
output: "ok".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_mixed_inserts_and_removals() {
|
||||
let items = vec![
|
||||
// Will get an inserted output
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
// Orphan output that should be removed
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
// Will get an inserted custom tool output
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
// Local shell call also gets an inserted function call output
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "t1".to_string(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "s1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
// In debug builds we panic on normalization errors instead of silently fixing them.
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_function_call_panics_in_debug() {
|
||||
let items = vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_custom_tool_call_panics_in_debug() {
|
||||
let items = vec![ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug() {
|
||||
let items = vec![ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_removes_orphan_function_call_output_panics_in_debug() {
|
||||
let items = vec![ResponseItem::FunctionCallOutput {
|
||||
call_id: "orphan-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_removes_orphan_custom_tool_call_output_panics_in_debug() {
|
||||
let items = vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "orphan-2".to_string(),
|
||||
output: "ok".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_mixed_inserts_and_removals_panics_in_debug() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::codex::ProcessedResponseItem;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::token_data::KnownPlan;
|
||||
use crate::token_data::PlanType;
|
||||
@@ -54,11 +53,8 @@ pub enum SandboxErr {
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum CodexErr {
|
||||
// todo(aibrahim): git rid of this error carrying the dangling artifacts
|
||||
#[error("turn aborted")]
|
||||
TurnAborted {
|
||||
dangling_artifacts: Vec<ProcessedResponseItem>,
|
||||
},
|
||||
TurnAborted,
|
||||
|
||||
/// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
|
||||
/// handshake has succeeded but **before** it finished emitting `response.completed`.
|
||||
@@ -162,9 +158,7 @@ pub enum CodexErr {
|
||||
|
||||
impl From<CancelErr> for CodexErr {
|
||||
fn from(_: CancelErr) -> Self {
|
||||
CodexErr::TurnAborted {
|
||||
dangling_artifacts: Vec::new(),
|
||||
}
|
||||
CodexErr::TurnAborted
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ mod client_common;
|
||||
pub mod codex;
|
||||
mod codex_conversation;
|
||||
pub use codex_conversation::CodexConversation;
|
||||
mod codex_delegate;
|
||||
mod command_safety;
|
||||
pub mod config;
|
||||
pub mod config_edit;
|
||||
@@ -37,7 +36,6 @@ mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub mod parse_command;
|
||||
mod response_processing;
|
||||
pub mod sandboxing;
|
||||
pub mod token_data;
|
||||
mod truncate;
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
use crate::codex::Session;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use tracing::warn;
|
||||
|
||||
/// Process streamed `ResponseItem`s from the model into the pair of:
|
||||
/// - items we should record in conversation history; and
|
||||
/// - `ResponseInputItem`s to send back to the model on the next turn.
|
||||
pub(crate) async fn process_items(
|
||||
processed_items: Vec<crate::codex::ProcessedResponseItem>,
|
||||
sess: &Session,
|
||||
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in processed_items {
|
||||
let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
|
||||
match (&item, &response) {
|
||||
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
|
||||
// If the model returned a message, we need to record it.
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
}
|
||||
(
|
||||
ResponseItem::LocalShellCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::CustomToolCall { .. },
|
||||
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let output = match result {
|
||||
Ok(call_tool_result) => {
|
||||
crate::codex::convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result,
|
||||
)
|
||||
}
|
||||
Err(err) => FunctionCallOutputPayload {
|
||||
content: err.clone(),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output,
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
},
|
||||
None,
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary.clone(),
|
||||
content: content.clone(),
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
}
|
||||
};
|
||||
if let Some(response) = response {
|
||||
responses.push(response);
|
||||
}
|
||||
}
|
||||
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await;
|
||||
}
|
||||
(responses, items_to_record_in_conversation_history)
|
||||
}
|
||||
@@ -36,12 +36,8 @@ impl SessionState {
|
||||
self.history.record_items(items)
|
||||
}
|
||||
|
||||
pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
|
||||
self.history.get_history()
|
||||
}
|
||||
|
||||
pub(crate) fn clone_history(&self) -> ConversationHistory {
|
||||
self.history.clone()
|
||||
pub(crate) fn history_snapshot(&self) -> Vec<ResponseItem> {
|
||||
self.history.contents()
|
||||
}
|
||||
|
||||
pub(crate) fn replace_history(&mut self, items: Vec<ResponseItem>) {
|
||||
|
||||
@@ -13,10 +13,8 @@ use tokio_util::task::AbortOnDropHandle;
|
||||
use tracing::trace;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::config::Config;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskCompleteEvent;
|
||||
use crate::protocol::TurnAbortReason;
|
||||
@@ -46,14 +44,6 @@ impl SessionTaskContext {
|
||||
pub(crate) fn clone_session(&self) -> Arc<Session> {
|
||||
Arc::clone(&self.session)
|
||||
}
|
||||
|
||||
pub(crate) fn auth_manager(&self) -> Arc<AuthManager> {
|
||||
Arc::clone(&self.session.services.auth_manager)
|
||||
}
|
||||
|
||||
pub(crate) async fn base_config(&self) -> Arc<Config> {
|
||||
self.session.base_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -1,18 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ReviewOutputEvent;
|
||||
use codex_protocol::protocol::TaskCompleteEvent;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex_delegate::run_codex_conversation;
|
||||
// use crate::config::Config; // no longer needed directly; use session.base_config()
|
||||
use crate::review_format::format_review_findings_block;
|
||||
use crate::codex::exit_review_mode;
|
||||
use crate::codex::run_task;
|
||||
use crate::state::TaskKind;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -35,108 +28,11 @@ impl SessionTask for ReviewTask {
|
||||
input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Option<String> {
|
||||
// let sess = session.clone_session();
|
||||
// run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
|
||||
|
||||
let config = session.base_config().await.as_ref().clone();
|
||||
let receiver =
|
||||
match run_codex_conversation(config, session.auth_manager(), input, cancellation_token)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(_) => return None,
|
||||
};
|
||||
while let Ok(event) = receiver.recv().await {
|
||||
session
|
||||
.clone_session()
|
||||
.send_event(ctx.as_ref(), event.clone())
|
||||
.await;
|
||||
if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = event {
|
||||
exit_review_mode(
|
||||
session.clone_session(),
|
||||
last_agent_message.as_deref().map(parse_review_output_event),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
Some("".to_string())
|
||||
let sess = session.clone_session();
|
||||
run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
|
||||
}
|
||||
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, _ctx: Arc<TurnContext>) {
|
||||
exit_review_mode(session.clone_session(), None).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
|
||||
/// and records a developer message with the review output.
|
||||
pub(crate) async fn exit_review_mode(
|
||||
session: Arc<Session>,
|
||||
review_output: Option<ReviewOutputEvent>,
|
||||
) {
|
||||
// ExitedReviewMode event can be emitted by the caller if needed.
|
||||
|
||||
let mut user_message = String::new();
|
||||
if let Some(out) = review_output {
|
||||
let mut findings_str = String::new();
|
||||
let text = out.overall_explanation.trim();
|
||||
if !text.is_empty() {
|
||||
findings_str.push_str(text);
|
||||
}
|
||||
if !out.findings.is_empty() {
|
||||
let block = format_review_findings_block(&out.findings, None);
|
||||
findings_str.push_str(&format!("\n{block}"));
|
||||
}
|
||||
user_message.push_str(&format!(
|
||||
r#"<user_action>
|
||||
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
{findings_str}
|
||||
</results>
|
||||
</user_action>
|
||||
"#));
|
||||
} else {
|
||||
user_message.push_str(r#"<user_action>
|
||||
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
None.
|
||||
</results>
|
||||
</user_action>
|
||||
"#);
|
||||
}
|
||||
|
||||
session
|
||||
.record_conversation_items(&[ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_message }],
|
||||
}])
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Parse the review output; when not valid JSON, build a structured
|
||||
/// fallback that carries the plain text as the overall explanation.
|
||||
///
|
||||
/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text.
|
||||
fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
|
||||
// Try direct parse first
|
||||
if let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(text) {
|
||||
return ev;
|
||||
}
|
||||
// If wrapped in markdown fences or extra prose, attempt to extract the first JSON object
|
||||
if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}'))
|
||||
&& start < end
|
||||
&& let Some(slice) = text.get(start..=end)
|
||||
&& let Ok(ev) = serde_json::from_str::<ReviewOutputEvent>(slice)
|
||||
{
|
||||
return ev;
|
||||
}
|
||||
// Not JSON – return a structured ReviewOutputEvent that carries
|
||||
// the plain text as the overall explanation.
|
||||
ReviewOutputEvent {
|
||||
overall_explanation: text.to_string(),
|
||||
..Default::default()
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::sync::Arc;
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::either::Either;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::AbortOnDropHandle;
|
||||
|
||||
use crate::codex::Session;
|
||||
@@ -10,10 +9,8 @@ use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::router::ToolCall;
|
||||
use crate::tools::router::ToolRouter;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
|
||||
pub(crate) struct ToolCallRuntime {
|
||||
@@ -43,7 +40,6 @@ impl ToolCallRuntime {
|
||||
pub(crate) fn handle_tool_call(
|
||||
&self,
|
||||
call: ToolCall,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
|
||||
let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);
|
||||
|
||||
@@ -52,24 +48,18 @@ impl ToolCallRuntime {
|
||||
let turn = Arc::clone(&self.turn_context);
|
||||
let tracker = Arc::clone(&self.tracker);
|
||||
let lock = Arc::clone(&self.parallel_execution);
|
||||
let aborted_response = Self::aborted_response(&call);
|
||||
|
||||
let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
|
||||
AbortOnDropHandle::new(tokio::spawn(async move {
|
||||
tokio::select! {
|
||||
_ = cancellation_token.cancelled() => Ok(aborted_response),
|
||||
res = async {
|
||||
let _guard = if supports_parallel {
|
||||
Either::Left(lock.read().await)
|
||||
} else {
|
||||
Either::Right(lock.write().await)
|
||||
};
|
||||
let _guard = if supports_parallel {
|
||||
Either::Left(lock.read().await)
|
||||
} else {
|
||||
Either::Right(lock.write().await)
|
||||
};
|
||||
|
||||
router
|
||||
.dispatch_tool_call(session, turn, tracker, call)
|
||||
.await
|
||||
} => res,
|
||||
}
|
||||
router
|
||||
.dispatch_tool_call(session, turn, tracker, call)
|
||||
.await
|
||||
}));
|
||||
|
||||
async move {
|
||||
@@ -84,25 +74,3 @@ impl ToolCallRuntime {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolCallRuntime {
|
||||
fn aborted_response(call: &ToolCall) -> ResponseInputItem {
|
||||
match &call.payload {
|
||||
ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
result: Err("aborted".to_string()),
|
||||
},
|
||||
_ => ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,22 +35,6 @@ impl ResponseMock {
|
||||
pub fn requests(&self) -> Vec<ResponsesRequest> {
|
||||
self.requests.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Returns true if any captured request contains a `function_call` with the
|
||||
/// provided `call_id`.
|
||||
pub fn saw_function_call(&self, call_id: &str) -> bool {
|
||||
self.requests()
|
||||
.iter()
|
||||
.any(|req| req.has_function_call(call_id))
|
||||
}
|
||||
|
||||
/// Returns the `output` string for a matching `function_call_output` with
|
||||
/// the provided `call_id`, searching across all captured requests.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
self.requests()
|
||||
.iter()
|
||||
.find_map(|req| req.function_call_output_text(call_id))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -86,28 +70,6 @@ impl ResponsesRequest {
|
||||
.unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
|
||||
}
|
||||
|
||||
/// Returns true if this request's `input` contains a `function_call` with
|
||||
/// the specified `call_id`.
|
||||
pub fn has_function_call(&self, call_id: &str) -> bool {
|
||||
self.input().iter().any(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})
|
||||
}
|
||||
|
||||
/// If present, returns the `output` string of the `function_call_output`
|
||||
/// entry matching `call_id` in this request's `input`.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
let binding = self.input();
|
||||
let item = binding.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})?;
|
||||
item.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
pub fn header(&self, name: &str) -> Option<String> {
|
||||
self.0
|
||||
.headers
|
||||
@@ -135,10 +97,6 @@ impl Match for ResponseMock {
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(ResponsesRequest(request.clone()));
|
||||
|
||||
// Enforce invariant checks on every request body captured by the mock.
|
||||
// Panic on orphan tool outputs or calls to catch regressions early.
|
||||
validate_request_body_invariants(request);
|
||||
true
|
||||
}
|
||||
}
|
||||
@@ -428,90 +386,3 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
|
||||
|
||||
response_mock
|
||||
}
|
||||
|
||||
/// Validate invariants on the request body sent to `/v1/responses`.
|
||||
///
|
||||
/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
|
||||
/// - Every `function_call_output` must match a prior `function_call` or
|
||||
/// `local_shell_call` with the same `call_id` in the same `input`.
|
||||
/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
|
||||
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
|
||||
/// in the `input` must have a matching output entry.
|
||||
fn validate_request_body_invariants(request: &wiremock::Request) {
|
||||
let Ok(body): Result<Value, _> = request.body_json() else {
|
||||
return;
|
||||
};
|
||||
let Some(items) = body.get("input").and_then(Value::as_array) else {
|
||||
panic!("input array not found in request");
|
||||
};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
fn get_call_id(item: &Value) -> Option<&str> {
|
||||
item.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|id| !id.is_empty())
|
||||
}
|
||||
|
||||
fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.filter_map(get_call_id)
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.map(|item| {
|
||||
let Some(id) = get_call_id(item) else {
|
||||
panic!("{missing_msg}");
|
||||
};
|
||||
id.to_string()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
let function_calls = gather_ids(items, "function_call");
|
||||
let custom_tool_calls = gather_ids(items, "custom_tool_call");
|
||||
let local_shell_calls = gather_ids(items, "local_shell_call");
|
||||
let function_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"function_call_output",
|
||||
"orphan function_call_output with empty call_id should be dropped",
|
||||
);
|
||||
let custom_tool_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"custom_tool_call_output",
|
||||
"orphan custom_tool_call_output with empty call_id should be dropped",
|
||||
);
|
||||
|
||||
for cid in &function_call_outputs {
|
||||
assert!(
|
||||
function_calls.contains(cid) || local_shell_calls.contains(cid),
|
||||
"function_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_call_outputs {
|
||||
assert!(
|
||||
custom_tool_calls.contains(cid),
|
||||
"custom_tool_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
|
||||
for cid in &function_calls {
|
||||
assert!(
|
||||
function_call_outputs.contains(cid),
|
||||
"Function call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_calls {
|
||||
assert!(
|
||||
custom_tool_call_outputs.contains(cid),
|
||||
"Custom tool call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::protocol::EventMsg;
|
||||
@@ -6,9 +5,7 @@ use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
@@ -70,98 +67,3 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// After an interrupt we expect the next request to the model to include both
|
||||
/// the original tool call and an `"aborted"` `function_call_output`. This test
|
||||
/// exercises the follow-up flow: it sends another user turn, inspects the mock
|
||||
/// responses server, and ensures the model receives the synthesized abort.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_tool_records_history_entries() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
let call_id = "call-history";
|
||||
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 60_000
|
||||
})
|
||||
.to_string();
|
||||
let first_body = sse(vec![
|
||||
ev_response_created("resp-history"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_completed("resp-history"),
|
||||
]);
|
||||
let follow_up_body = sse(vec![
|
||||
ev_response_created("resp-followup"),
|
||||
ev_completed("resp-followup"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
|
||||
|
||||
let fixture = test_codex().build(&server).await.unwrap();
|
||||
let codex = Arc::clone(&fixture.codex);
|
||||
|
||||
let wait_timeout = Duration::from_millis(100);
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "start history recording".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
codex.submit(Op::Interrupt).await.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::TurnAborted(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "follow up".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::TaskComplete(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
let requests = response_mock.requests();
|
||||
assert!(
|
||||
requests.len() == 2,
|
||||
"expected two calls to the responses API, got {}",
|
||||
requests.len()
|
||||
);
|
||||
|
||||
assert!(
|
||||
response_mock.saw_function_call(call_id),
|
||||
"function call not recorded in responses payload"
|
||||
);
|
||||
assert_eq!(
|
||||
response_mock.function_call_output_text(call_id).as_deref(),
|
||||
Some("aborted"),
|
||||
"aborted function call output not recorded in responses payload"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -227,6 +227,62 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let local_shell_event = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "local_shell_call",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": ["/bin/echo", "hi"],
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
local_shell_event,
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"check shell output",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let item = second_mock.single_request().function_call_output("");
|
||||
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
|
||||
assert_eq!(
|
||||
item.get("output").and_then(Value::as_str),
|
||||
Some("LocalShellCall without call_id or id"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
|
||||
@@ -919,7 +919,6 @@ pub enum SessionSource {
|
||||
VSCode,
|
||||
Exec,
|
||||
Mcp,
|
||||
SubAgent,
|
||||
#[serde(other)]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user