mirror of
https://github.com/openai/codex.git
synced 2026-02-05 08:23:41 +00:00
Compare commits
14 Commits
shell-tool
...
robin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3f6608e6b | ||
|
|
0e051644a9 | ||
|
|
40d14c0756 | ||
|
|
af65666561 | ||
|
|
2ae1f81d84 | ||
|
|
d363a0968e | ||
|
|
bce030ddb5 | ||
|
|
f4af6e389e | ||
|
|
b315b22f7b | ||
|
|
c9e149fd5c | ||
|
|
bacdc004be | ||
|
|
ab5972d447 | ||
|
|
767b66f407 | ||
|
|
830ab4ce20 |
1
.github/workflows/issue-deduplicator.yml
vendored
1
.github/workflows/issue-deduplicator.yml
vendored
@@ -46,7 +46,6 @@ jobs:
|
||||
with:
|
||||
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
allow-users: "*"
|
||||
model: gpt-5.1
|
||||
prompt: |
|
||||
You are an assistant that triages new GitHub issues by identifying potential duplicates.
|
||||
|
||||
|
||||
22
.github/workflows/rust-release.yml
vendored
22
.github/workflows/rust-release.yml
vendored
@@ -371,8 +371,20 @@ jobs:
|
||||
path: |
|
||||
codex-rs/dist/${{ matrix.target }}/*
|
||||
|
||||
shell-tool-mcp:
|
||||
name: shell-tool-mcp
|
||||
needs: tag-check
|
||||
uses: ./.github/workflows/shell-tool-mcp.yml
|
||||
with:
|
||||
release-tag: ${{ github.ref_name }}
|
||||
# We are not ready to publish yet.
|
||||
publish: false
|
||||
secrets: inherit
|
||||
|
||||
release:
|
||||
needs: build
|
||||
needs:
|
||||
- build
|
||||
- shell-tool-mcp
|
||||
name: release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
@@ -395,6 +407,14 @@ jobs:
|
||||
- name: List
|
||||
run: ls -R dist/
|
||||
|
||||
# This is a temporary fix: we should modify shell-tool-mcp.yml so these
|
||||
# files do not end up in dist/ in the first place.
|
||||
- name: Delete entries from dist/ that should not go in the release
|
||||
run: |
|
||||
rm -rf dist/shell-tool-mcp*
|
||||
|
||||
ls -R dist/
|
||||
|
||||
- name: Define release name
|
||||
id: release_name
|
||||
run: |
|
||||
|
||||
48
.github/workflows/shell-tool-mcp-ci.yml
vendored
Normal file
48
.github/workflows/shell-tool-mcp-ci.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: shell-tool-mcp CI
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- "shell-tool-mcp/**"
|
||||
- ".github/workflows/shell-tool-mcp-ci.yml"
|
||||
- "pnpm-lock.yaml"
|
||||
- "pnpm-workspace.yaml"
|
||||
pull_request:
|
||||
paths:
|
||||
- "shell-tool-mcp/**"
|
||||
- ".github/workflows/shell-tool-mcp-ci.yml"
|
||||
- "pnpm-lock.yaml"
|
||||
- "pnpm-workspace.yaml"
|
||||
|
||||
env:
|
||||
NODE_VERSION: 22
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
run_install: false
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
cache: "pnpm"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Format check
|
||||
run: pnpm --filter @openai/codex-shell-tool-mcp run format
|
||||
|
||||
- name: Run tests
|
||||
run: pnpm --filter @openai/codex-shell-tool-mcp test
|
||||
|
||||
- name: Build
|
||||
run: pnpm --filter @openai/codex-shell-tool-mcp run build
|
||||
412
.github/workflows/shell-tool-mcp.yml
vendored
Normal file
412
.github/workflows/shell-tool-mcp.yml
vendored
Normal file
@@ -0,0 +1,412 @@
|
||||
name: shell-tool-mcp
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
release-version:
|
||||
description: Version to publish (x.y.z or x.y.z-alpha.N). Defaults to GITHUB_REF_NAME when it starts with rust-v.
|
||||
required: false
|
||||
type: string
|
||||
release-tag:
|
||||
description: Tag name to use when downloading release artifacts (defaults to rust-v<version>).
|
||||
required: false
|
||||
type: string
|
||||
publish:
|
||||
description: Whether to publish to npm when the version is releasable.
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
env:
|
||||
NODE_VERSION: 22
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.compute.outputs.version }}
|
||||
release_tag: ${{ steps.compute.outputs.release_tag }}
|
||||
should_publish: ${{ steps.compute.outputs.should_publish }}
|
||||
npm_tag: ${{ steps.compute.outputs.npm_tag }}
|
||||
steps:
|
||||
- name: Compute version and tags
|
||||
id: compute
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
version="${{ inputs.release-version }}"
|
||||
release_tag="${{ inputs.release-tag }}"
|
||||
|
||||
if [[ -z "$version" ]]; then
|
||||
if [[ -n "$release_tag" && "$release_tag" =~ ^rust-v.+ ]]; then
|
||||
version="${release_tag#rust-v}"
|
||||
elif [[ "${GITHUB_REF_NAME:-}" =~ ^rust-v.+ ]]; then
|
||||
version="${GITHUB_REF_NAME#rust-v}"
|
||||
release_tag="${GITHUB_REF_NAME}"
|
||||
else
|
||||
echo "release-version is required when GITHUB_REF_NAME is not a rust-v tag."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "$release_tag" ]]; then
|
||||
release_tag="rust-v${version}"
|
||||
fi
|
||||
|
||||
npm_tag=""
|
||||
should_publish="false"
|
||||
if [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
should_publish="true"
|
||||
elif [[ "$version" =~ ^[0-9]+\.[0-9]+\.[0-9]+-alpha\.[0-9]+$ ]]; then
|
||||
should_publish="true"
|
||||
npm_tag="alpha"
|
||||
fi
|
||||
|
||||
echo "version=${version}" >> "$GITHUB_OUTPUT"
|
||||
echo "release_tag=${release_tag}" >> "$GITHUB_OUTPUT"
|
||||
echo "npm_tag=${npm_tag}" >> "$GITHUB_OUTPUT"
|
||||
echo "should_publish=${should_publish}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
rust-binaries:
|
||||
name: Build Rust - ${{ matrix.target }}
|
||||
needs: metadata
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
defaults:
|
||||
run:
|
||||
working-directory: codex-rs
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: macos-15-xlarge
|
||||
target: aarch64-apple-darwin
|
||||
- runner: macos-15-xlarge
|
||||
target: x86_64-apple-darwin
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
install_musl: true
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
install_musl: true
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.90
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
- if: ${{ matrix.install_musl }}
|
||||
name: Install musl build dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y musl-tools pkg-config
|
||||
|
||||
- name: Build exec server binaries
|
||||
run: cargo build --release --target ${{ matrix.target }} --bin codex-exec-mcp-server --bin codex-execve-wrapper
|
||||
|
||||
- name: Stage exec server binaries
|
||||
run: |
|
||||
dest="${GITHUB_WORKSPACE}/artifacts/vendor/${{ matrix.target }}"
|
||||
mkdir -p "$dest"
|
||||
cp "target/${{ matrix.target }}/release/codex-exec-mcp-server" "$dest/"
|
||||
cp "target/${{ matrix.target }}/release/codex-execve-wrapper" "$dest/"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: shell-tool-mcp-rust-${{ matrix.target }}
|
||||
path: artifacts/**
|
||||
if-no-files-found: error
|
||||
|
||||
bash-linux:
|
||||
name: Build Bash (Linux) - ${{ matrix.variant }} - ${{ matrix.target }}
|
||||
needs: metadata
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
container:
|
||||
image: ${{ matrix.image }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: ubuntu-24.04
|
||||
image: ubuntu:24.04
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: ubuntu-22.04
|
||||
image: ubuntu:22.04
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: ubuntu-20.04
|
||||
image: ubuntu:20.04
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: debian-12
|
||||
image: debian:12
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: debian-11
|
||||
image: debian:11
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
variant: centos-9
|
||||
image: quay.io/centos/centos:stream9
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: ubuntu-24.04
|
||||
image: arm64v8/ubuntu:24.04
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: ubuntu-22.04
|
||||
image: arm64v8/ubuntu:22.04
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: ubuntu-20.04
|
||||
image: arm64v8/ubuntu:20.04
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: debian-12
|
||||
image: arm64v8/debian:12
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: debian-11
|
||||
image: arm64v8/debian:11
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
variant: centos-9
|
||||
image: quay.io/centos/centos:stream9
|
||||
steps:
|
||||
- name: Install build prerequisites
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get update
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y git build-essential bison autoconf gettext
|
||||
elif command -v dnf >/dev/null 2>&1; then
|
||||
dnf install -y git gcc gcc-c++ make bison autoconf gettext
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
yum install -y git gcc gcc-c++ make bison autoconf gettext
|
||||
else
|
||||
echo "Unsupported package manager in container"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Build patched Bash
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git clone --depth 1 https://github.com/bminor/bash /tmp/bash
|
||||
cd /tmp/bash
|
||||
git fetch --depth 1 origin a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git checkout a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git apply "${GITHUB_WORKSPACE}/shell-tool-mcp/patches/bash-exec-wrapper.patch"
|
||||
./configure --without-bash-malloc
|
||||
cores="$(command -v nproc >/dev/null 2>&1 && nproc || getconf _NPROCESSORS_ONLN)"
|
||||
make -j"${cores}"
|
||||
|
||||
dest="${GITHUB_WORKSPACE}/artifacts/vendor/${{ matrix.target }}/bash/${{ matrix.variant }}"
|
||||
mkdir -p "$dest"
|
||||
cp bash "$dest/bash"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: shell-tool-mcp-bash-${{ matrix.target }}-${{ matrix.variant }}
|
||||
path: artifacts/**
|
||||
if-no-files-found: error
|
||||
|
||||
bash-darwin:
|
||||
name: Build Bash (macOS) - ${{ matrix.variant }} - ${{ matrix.target }}
|
||||
needs: metadata
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: macos-15-xlarge
|
||||
target: aarch64-apple-darwin
|
||||
variant: macos-15
|
||||
- runner: macos-14
|
||||
target: aarch64-apple-darwin
|
||||
variant: macos-14
|
||||
- runner: macos-13
|
||||
target: x86_64-apple-darwin
|
||||
variant: macos-13
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Build patched Bash
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git clone --depth 1 https://github.com/bminor/bash /tmp/bash
|
||||
cd /tmp/bash
|
||||
git fetch --depth 1 origin a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git checkout a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git apply "${GITHUB_WORKSPACE}/shell-tool-mcp/patches/bash-exec-wrapper.patch"
|
||||
./configure --without-bash-malloc
|
||||
cores="$(getconf _NPROCESSORS_ONLN)"
|
||||
make -j"${cores}"
|
||||
|
||||
dest="${GITHUB_WORKSPACE}/artifacts/vendor/${{ matrix.target }}/bash/${{ matrix.variant }}"
|
||||
mkdir -p "$dest"
|
||||
cp bash "$dest/bash"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: shell-tool-mcp-bash-${{ matrix.target }}-${{ matrix.variant }}
|
||||
path: artifacts/**
|
||||
if-no-files-found: error
|
||||
|
||||
package:
|
||||
name: Package npm module
|
||||
needs:
|
||||
- metadata
|
||||
- rust-binaries
|
||||
- bash-linux
|
||||
- bash-darwin
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PACKAGE_VERSION: ${{ needs.metadata.outputs.version }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10.8.1
|
||||
run_install: false
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
|
||||
- name: Install JavaScript dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build (shell-tool-mcp)
|
||||
run: pnpm --filter @openai/codex-shell-tool-mcp run build
|
||||
|
||||
- name: Download build artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifacts
|
||||
|
||||
- name: Assemble staging directory
|
||||
id: staging
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
staging="${STAGING_DIR}"
|
||||
mkdir -p "$staging" "$staging/vendor"
|
||||
cp shell-tool-mcp/README.md "$staging/"
|
||||
cp shell-tool-mcp/package.json "$staging/"
|
||||
cp -R shell-tool-mcp/bin "$staging/"
|
||||
|
||||
found_vendor="false"
|
||||
shopt -s nullglob
|
||||
for vendor_dir in artifacts/*/vendor; do
|
||||
rsync -av "$vendor_dir/" "$staging/vendor/"
|
||||
found_vendor="true"
|
||||
done
|
||||
if [[ "$found_vendor" == "false" ]]; then
|
||||
echo "No vendor payloads were downloaded."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
node - <<'NODE'
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
const stagingDir = process.env.STAGING_DIR;
|
||||
const version = process.env.PACKAGE_VERSION;
|
||||
const pkgPath = path.join(stagingDir, "package.json");
|
||||
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
|
||||
pkg.version = version;
|
||||
fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + "\n");
|
||||
NODE
|
||||
|
||||
echo "dir=$staging" >> "$GITHUB_OUTPUT"
|
||||
env:
|
||||
STAGING_DIR: ${{ runner.temp }}/shell-tool-mcp
|
||||
|
||||
- name: Ensure binaries are executable
|
||||
run: |
|
||||
set -euo pipefail
|
||||
staging="${{ steps.staging.outputs.dir }}"
|
||||
chmod +x \
|
||||
"$staging"/vendor/*/codex-exec-mcp-server \
|
||||
"$staging"/vendor/*/codex-execve-wrapper \
|
||||
"$staging"/vendor/*/bash/*/bash
|
||||
|
||||
- name: Create npm tarball
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
mkdir -p dist/npm
|
||||
staging="${{ steps.staging.outputs.dir }}"
|
||||
pack_info=$(cd "$staging" && npm pack --ignore-scripts --json --pack-destination "${GITHUB_WORKSPACE}/dist/npm")
|
||||
filename=$(PACK_INFO="$pack_info" node -e 'const data = JSON.parse(process.env.PACK_INFO); console.log(data[0].filename);')
|
||||
mv "dist/npm/${filename}" "dist/npm/codex-shell-tool-mcp-npm-${PACKAGE_VERSION}.tgz"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: codex-shell-tool-mcp-npm
|
||||
path: dist/npm/codex-shell-tool-mcp-npm-${{ env.PACKAGE_VERSION }}.tgz
|
||||
if-no-files-found: error
|
||||
|
||||
publish:
|
||||
name: Publish npm package
|
||||
needs:
|
||||
- metadata
|
||||
- package
|
||||
if: ${{ inputs.publish && needs.metadata.outputs.should_publish == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10.8.1
|
||||
run_install: false
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version: ${{ env.NODE_VERSION }}
|
||||
registry-url: https://registry.npmjs.org
|
||||
scope: "@openai"
|
||||
|
||||
- name: Update npm
|
||||
run: npm install -g npm@latest
|
||||
|
||||
- name: Download npm tarball
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: codex-shell-tool-mcp-npm
|
||||
path: dist/npm
|
||||
|
||||
- name: Publish to npm
|
||||
env:
|
||||
NPM_TAG: ${{ needs.metadata.outputs.npm_tag }}
|
||||
VERSION: ${{ needs.metadata.outputs.version }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
tag_args=()
|
||||
if [[ -n "${NPM_TAG}" ]]; then
|
||||
tag_args+=(--tag "${NPM_TAG}")
|
||||
fi
|
||||
npm publish "dist/npm/codex-shell-tool-mcp-npm-${VERSION}.tgz" "${tag_args[@]}"
|
||||
6
codex-rs/Cargo.lock
generated
6
codex-rs/Cargo.lock
generated
@@ -187,8 +187,10 @@ dependencies = [
|
||||
"codex-app-server-protocol",
|
||||
"codex-core",
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"tokio",
|
||||
"uuid",
|
||||
"wiremock",
|
||||
@@ -868,6 +870,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serial_test",
|
||||
"shlex",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"toml",
|
||||
@@ -1776,6 +1779,7 @@ dependencies = [
|
||||
"notify",
|
||||
"regex-lite",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"walkdir",
|
||||
@@ -3737,11 +3741,13 @@ dependencies = [
|
||||
"assert_cmd",
|
||||
"codex-core",
|
||||
"codex-mcp-server",
|
||||
"core_test_support",
|
||||
"mcp-types",
|
||||
"os_info",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"tokio",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
@@ -859,6 +859,7 @@ pub enum CommandExecutionStatus {
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
Declined,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
|
||||
@@ -53,3 +53,4 @@ serial_test = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
|
||||
@@ -175,12 +175,20 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
});
|
||||
}
|
||||
ApiVersion::V2 => {
|
||||
let item_id = call_id.clone();
|
||||
let command_actions = parsed_cmd
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect::<Vec<_>>();
|
||||
let command_string = shlex_join(&command);
|
||||
|
||||
let params = CommandExecutionRequestApprovalParams {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: turn_id.clone(),
|
||||
// Until we migrate the core to be aware of a first class CommandExecutionItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
item_id: call_id.clone(),
|
||||
item_id: item_id.clone(),
|
||||
reason,
|
||||
risk: risk.map(V2SandboxCommandAssessment::from),
|
||||
};
|
||||
@@ -190,8 +198,17 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_command_execution_request_approval_response(event_id, rx, conversation)
|
||||
.await;
|
||||
on_command_execution_request_approval_response(
|
||||
event_id,
|
||||
item_id,
|
||||
command_string,
|
||||
cwd,
|
||||
command_actions,
|
||||
rx,
|
||||
conversation,
|
||||
outgoing,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
},
|
||||
@@ -370,16 +387,21 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
|
||||
let item_id = exec_command_begin_event.call_id.clone();
|
||||
let command_actions = exec_command_begin_event
|
||||
.parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect::<Vec<_>>();
|
||||
let command = shlex_join(&exec_command_begin_event.command);
|
||||
let cwd = exec_command_begin_event.cwd;
|
||||
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: exec_command_begin_event.call_id.clone(),
|
||||
command: shlex_join(&exec_command_begin_event.command),
|
||||
cwd: exec_command_begin_event.cwd,
|
||||
id: item_id,
|
||||
command,
|
||||
cwd,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
command_actions: exec_command_begin_event
|
||||
.parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect(),
|
||||
command_actions,
|
||||
aggregated_output: None,
|
||||
exit_code: None,
|
||||
duration_ms: None,
|
||||
@@ -417,6 +439,10 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
} else {
|
||||
CommandExecutionStatus::Failed
|
||||
};
|
||||
let command_actions = parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let aggregated_output = if aggregated_output.is_empty() {
|
||||
None
|
||||
@@ -431,7 +457,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
command: shlex_join(&command),
|
||||
cwd,
|
||||
status,
|
||||
command_actions: parsed_cmd.into_iter().map(V2ParsedCommand::from).collect(),
|
||||
command_actions,
|
||||
aggregated_output,
|
||||
exit_code: Some(exit_code),
|
||||
duration_ms: Some(duration_ms),
|
||||
@@ -516,6 +542,30 @@ async fn complete_file_change_item(
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn complete_command_execution_item(
|
||||
item_id: String,
|
||||
command: String,
|
||||
cwd: PathBuf,
|
||||
command_actions: Vec<V2ParsedCommand>,
|
||||
status: CommandExecutionStatus,
|
||||
outgoing: &OutgoingMessageSender,
|
||||
) {
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: item_id,
|
||||
command,
|
||||
cwd,
|
||||
status,
|
||||
command_actions,
|
||||
aggregated_output: None,
|
||||
exit_code: None,
|
||||
duration_ms: None,
|
||||
};
|
||||
let notification = ItemCompletedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn find_and_remove_turn_summary(
|
||||
conversation_id: ConversationId,
|
||||
turn_summary_store: &TurnSummaryStore,
|
||||
@@ -765,42 +815,68 @@ async fn on_file_change_request_approval_response(
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn on_command_execution_request_approval_response(
|
||||
event_id: String,
|
||||
item_id: String,
|
||||
command: String,
|
||||
cwd: PathBuf,
|
||||
command_actions: Vec<V2ParsedCommand>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
let value = match response {
|
||||
Ok(value) => value,
|
||||
let (decision, completion_status) = match response {
|
||||
Ok(value) => {
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
}
|
||||
});
|
||||
|
||||
let CommandExecutionRequestApprovalResponse {
|
||||
decision,
|
||||
accept_settings,
|
||||
} = response;
|
||||
|
||||
let (decision, completion_status) = match (decision, accept_settings) {
|
||||
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
|
||||
(ReviewDecision::ApprovedForSession, None)
|
||||
}
|
||||
(ApprovalDecision::Accept, _) => (ReviewDecision::Approved, None),
|
||||
(ApprovalDecision::Decline, _) => (
|
||||
ReviewDecision::Denied,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
(ApprovalDecision::Cancel, _) => (
|
||||
ReviewDecision::Abort,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
};
|
||||
(decision, completion_status)
|
||||
}
|
||||
Err(err) => {
|
||||
error!("request failed: {err:?}");
|
||||
return;
|
||||
(ReviewDecision::Denied, Some(CommandExecutionStatus::Failed))
|
||||
}
|
||||
};
|
||||
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
}
|
||||
});
|
||||
if let Some(status) = completion_status {
|
||||
complete_command_execution_item(
|
||||
item_id.clone(),
|
||||
command.clone(),
|
||||
cwd.clone(),
|
||||
command_actions.clone(),
|
||||
status,
|
||||
outgoing.as_ref(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let CommandExecutionRequestApprovalResponse {
|
||||
decision,
|
||||
accept_settings,
|
||||
} = response;
|
||||
|
||||
let decision = match (decision, accept_settings) {
|
||||
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
|
||||
ReviewDecision::ApprovedForSession
|
||||
}
|
||||
(ApprovalDecision::Accept, _) => ReviewDecision::Approved,
|
||||
(ApprovalDecision::Decline, _) => ReviewDecision::Denied,
|
||||
(ApprovalDecision::Cancel, _) => ReviewDecision::Abort,
|
||||
};
|
||||
if let Err(err) = conversation
|
||||
.submit(Op::ExecApproval {
|
||||
id: event_id,
|
||||
|
||||
@@ -1175,8 +1175,6 @@ impl CodexMessageProcessor {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let effective_policy = params
|
||||
|
||||
@@ -24,3 +24,5 @@ tokio = { workspace = true, features = [
|
||||
] }
|
||||
uuid = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
core_test_support = { path = "../../../core/tests/common" }
|
||||
shlex = { workspace = true }
|
||||
|
||||
@@ -9,12 +9,14 @@ pub use auth_fixtures::ChatGptIdTokenClaims;
|
||||
pub use auth_fixtures::encode_id_token;
|
||||
pub use auth_fixtures::write_chatgpt_auth;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
pub use core_test_support::format_with_current_shell;
|
||||
pub use core_test_support::format_with_current_shell_display;
|
||||
pub use mcp_process::McpProcess;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
pub use mock_model_server::create_mock_chat_completions_server_unchecked;
|
||||
pub use responses::create_apply_patch_sse_response;
|
||||
pub use responses::create_final_assistant_message_sse_response;
|
||||
pub use responses::create_shell_sse_response;
|
||||
pub use responses::create_shell_command_sse_response;
|
||||
pub use rollout::create_fake_rollout;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
use serde_json::json;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn create_shell_sse_response(
|
||||
pub fn create_shell_command_sse_response(
|
||||
command: Vec<String>,
|
||||
workdir: Option<&Path>,
|
||||
timeout_ms: Option<u64>,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// The `arguments`` for the `shell` tool is a serialized JSON object.
|
||||
// The `arguments` for the `shell_command` tool is a serialized JSON object.
|
||||
let command_str = shlex::try_join(command.iter().map(String::as_str))?;
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": command,
|
||||
"command": command_str,
|
||||
"workdir": workdir.map(|w| w.to_string_lossy()),
|
||||
"timeout": timeout_ms
|
||||
"timeout_ms": timeout_ms
|
||||
}))?;
|
||||
let tool_call = json!({
|
||||
"choices": [
|
||||
@@ -21,7 +22,7 @@ pub fn create_shell_sse_response(
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"name": "shell_command",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
@@ -62,10 +63,10 @@ pub fn create_apply_patch_sse_response(
|
||||
patch_content: &str,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// Use shell command to call apply_patch with heredoc format
|
||||
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
|
||||
// Use shell_command to call apply_patch with heredoc format
|
||||
let command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": ["bash", "-lc", shell_command]
|
||||
"command": command
|
||||
}))?;
|
||||
|
||||
let tool_call = json!({
|
||||
@@ -76,7 +77,7 @@ pub fn create_apply_patch_sse_response(
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"name": "shell_command",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,8 @@ use anyhow::Result;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::format_with_current_shell;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::AddConversationListenerParams;
|
||||
use codex_app_server_protocol::AddConversationSubscriptionResponse;
|
||||
@@ -56,7 +57,7 @@ async fn test_codex_jsonrpc_conversation_flow() -> Result<()> {
|
||||
// Create a mock model server that immediately ends each turn.
|
||||
// Two turns are expected: initial session configure + one user message.
|
||||
let responses = vec![
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
vec!["ls".to_string()],
|
||||
Some(&working_directory),
|
||||
Some(5000),
|
||||
@@ -175,7 +176,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
|
||||
|
||||
// Mock server will request a python shell call for the first and second turn, then finish.
|
||||
let responses = vec![
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
@@ -186,7 +187,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
|
||||
"call1",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done 1")?,
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
@@ -267,11 +268,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
|
||||
ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id: "call1".to_string(),
|
||||
command: vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
"print(42)".to_string(),
|
||||
],
|
||||
command: format_with_current_shell("python3 -c 'print(42)'"),
|
||||
cwd: working_directory.clone(),
|
||||
reason: None,
|
||||
risk: None,
|
||||
@@ -353,23 +350,15 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
|
||||
std::fs::create_dir(&second_cwd)?;
|
||||
|
||||
let responses = vec![
|
||||
create_shell_sse_response(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo first turn".to_string(),
|
||||
],
|
||||
create_shell_command_sse_response(
|
||||
vec!["echo".to_string(), "first".to_string(), "turn".to_string()],
|
||||
None,
|
||||
Some(5000),
|
||||
"call-first",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done first")?,
|
||||
create_shell_sse_response(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string(),
|
||||
],
|
||||
create_shell_command_sse_response(
|
||||
vec!["echo".to_string(), "second".to_string(), "turn".to_string()],
|
||||
None,
|
||||
Some(5000),
|
||||
"call-second",
|
||||
@@ -481,13 +470,9 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
|
||||
exec_begin.cwd, second_cwd,
|
||||
"exec turn should run from updated cwd"
|
||||
);
|
||||
let expected_command = format_with_current_shell("echo second turn");
|
||||
assert_eq!(
|
||||
exec_begin.command,
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string()
|
||||
],
|
||||
exec_begin.command, expected_command,
|
||||
"exec turn should run expected command"
|
||||
);
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ use tokio::time::timeout;
|
||||
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::to_response;
|
||||
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
@@ -56,7 +56,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
|
||||
std::fs::create_dir(&working_directory)?;
|
||||
|
||||
// Create mock server with a single SSE response: the long sleep command
|
||||
let server = create_mock_chat_completions_server(vec![create_shell_sse_response(
|
||||
let server = create_mock_chat_completions_server(vec![create_shell_command_sse_response(
|
||||
shell_command.clone(),
|
||||
Some(&working_directory),
|
||||
Some(10_000), // 10 seconds timeout in ms
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use anyhow::Result;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
@@ -41,7 +41,7 @@ async fn turn_interrupt_aborts_running_turn() -> Result<()> {
|
||||
std::fs::create_dir(&working_directory)?;
|
||||
|
||||
// Mock server: long-running shell command then (after abort) nothing else needed.
|
||||
let server = create_mock_chat_completions_server(vec![create_shell_sse_response(
|
||||
let server = create_mock_chat_completions_server(vec![create_shell_command_sse_response(
|
||||
shell_command.clone(),
|
||||
Some(&working_directory),
|
||||
Some(10_000),
|
||||
|
||||
@@ -4,9 +4,11 @@ use app_test_support::create_apply_patch_sse_response;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_mock_chat_completions_server_unchecked;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::format_with_current_shell_display;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
|
||||
use codex_app_server_protocol::ItemCompletedNotification;
|
||||
@@ -203,7 +205,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
// Mock server: first turn requests a shell call (elicitation), then completes.
|
||||
// Second turn same, but we'll set approval_policy=never to avoid elicitation.
|
||||
let responses = vec![
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
@@ -214,7 +216,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
"call1",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done 1")?,
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
@@ -328,6 +330,145 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_exec_approval_decline_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().to_path_buf();
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let responses = vec![
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
"print(42)".to_string(),
|
||||
],
|
||||
None,
|
||||
Some(5000),
|
||||
"call-decline",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done")?,
|
||||
];
|
||||
let server = create_mock_chat_completions_server(responses).await;
|
||||
create_config_toml(codex_home.as_path(), &server.uri(), "untrusted")?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.as_path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("mock-model".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let start_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
let turn_id = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "run python".to_string(),
|
||||
}],
|
||||
cwd: Some(workspace.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
|
||||
)
|
||||
.await??;
|
||||
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let started_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let started_notif = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let started: ItemStartedNotification =
|
||||
serde_json::from_value(started_notif.params.clone().expect("item/started params"))?;
|
||||
if let ThreadItem::CommandExecution { .. } = started.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::CommandExecution { id, status, .. } = started_command_execution else {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(id, "call-decline");
|
||||
assert_eq!(status, CommandExecutionStatus::InProgress);
|
||||
|
||||
let server_req = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected CommandExecutionRequestApproval request")
|
||||
};
|
||||
assert_eq!(params.item_id, "call-decline");
|
||||
assert_eq!(params.thread_id, thread.id);
|
||||
assert_eq!(params.turn_id, turn.id);
|
||||
|
||||
mcp.send_response(
|
||||
request_id,
|
||||
serde_json::to_value(CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let completed_notif = mcp
|
||||
.read_stream_until_notification_message("item/completed")
|
||||
.await?;
|
||||
let completed: ItemCompletedNotification = serde_json::from_value(
|
||||
completed_notif
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/completed params"),
|
||||
)?;
|
||||
if let ThreadItem::CommandExecution { .. } = completed.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(completed.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::CommandExecution {
|
||||
id,
|
||||
status,
|
||||
exit_code,
|
||||
aggregated_output,
|
||||
..
|
||||
} = completed_command_execution
|
||||
else {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(id, "call-decline");
|
||||
assert_eq!(status, CommandExecutionStatus::Declined);
|
||||
assert!(exit_code.is_none());
|
||||
assert!(aggregated_output.is_none());
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
)
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -343,23 +484,15 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
std::fs::create_dir(&second_cwd)?;
|
||||
|
||||
let responses = vec![
|
||||
create_shell_sse_response(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo first turn".to_string(),
|
||||
],
|
||||
create_shell_command_sse_response(
|
||||
vec!["echo".to_string(), "first".to_string(), "turn".to_string()],
|
||||
None,
|
||||
Some(5000),
|
||||
"call-first",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done first")?,
|
||||
create_shell_sse_response(
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string(),
|
||||
],
|
||||
create_shell_command_sse_response(
|
||||
vec!["echo".to_string(), "second".to_string(), "turn".to_string()],
|
||||
None,
|
||||
Some(5000),
|
||||
"call-second",
|
||||
@@ -465,7 +598,8 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(cwd, second_cwd);
|
||||
assert_eq!(command, "bash -lc 'echo second turn'");
|
||||
let expected_command = format_with_current_shell_display("echo second turn");
|
||||
assert_eq!(command, expected_command);
|
||||
assert_eq!(status, CommandExecutionStatus::InProgress);
|
||||
|
||||
timeout(
|
||||
@@ -480,6 +614,10 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
#[tokio::test]
|
||||
async fn turn_start_file_change_approval_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
if cfg!(windows) {
|
||||
// TODO apply_patch approvals are not parsed from powershell commands yet
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().join("codex_home");
|
||||
@@ -626,6 +764,10 @@ async fn turn_start_file_change_approval_v2() -> Result<()> {
|
||||
#[tokio::test]
|
||||
async fn turn_start_file_change_approval_decline_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
if cfg!(windows) {
|
||||
// TODO apply_patch approvals are not parsed from powershell commands yet
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().join("codex_home");
|
||||
|
||||
@@ -30,6 +30,7 @@ pub use standalone_executable::main;
|
||||
pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_tool_instructions.md");
|
||||
|
||||
const APPLY_PATCH_COMMANDS: [&str; 2] = ["apply_patch", "applypatch"];
|
||||
const APPLY_PATCH_SHELLS: [&str; 3] = ["bash", "zsh", "sh"];
|
||||
|
||||
#[derive(Debug, Error, PartialEq)]
|
||||
pub enum ApplyPatchError {
|
||||
@@ -96,6 +97,13 @@ pub struct ApplyPatchArgs {
|
||||
pub workdir: Option<String>,
|
||||
}
|
||||
|
||||
fn shell_supports_apply_patch(shell: &str) -> bool {
|
||||
std::path::Path::new(shell)
|
||||
.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.is_some_and(|name| APPLY_PATCH_SHELLS.contains(&name))
|
||||
}
|
||||
|
||||
pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
|
||||
match argv {
|
||||
// Direct invocation: apply_patch <patch>
|
||||
@@ -104,7 +112,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
|
||||
Err(e) => MaybeApplyPatch::PatchParseError(e),
|
||||
},
|
||||
// Bash heredoc form: (optional `cd <path> &&`) apply_patch <<'EOF' ...
|
||||
[bash, flag, script] if bash == "bash" && flag == "-lc" => {
|
||||
[shell, flag, script] if shell_supports_apply_patch(shell) && flag == "-lc" => {
|
||||
match extract_apply_patch_from_bash(script) {
|
||||
Ok((body, workdir)) => match parse_patch(&body) {
|
||||
Ok(mut source) => {
|
||||
@@ -224,12 +232,12 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
|
||||
);
|
||||
}
|
||||
}
|
||||
[bash, flag, script] if bash == "bash" && flag == "-lc" => {
|
||||
if parse_patch(script).is_ok() {
|
||||
return MaybeApplyPatchVerified::CorrectnessError(
|
||||
ApplyPatchError::ImplicitInvocation,
|
||||
);
|
||||
}
|
||||
[shell, flag, script]
|
||||
if shell_supports_apply_patch(shell)
|
||||
&& flag == "-lc"
|
||||
&& parse_patch(script).is_ok() =>
|
||||
{
|
||||
return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -543,7 +543,6 @@ impl Session {
|
||||
config.model_reasoning_effort,
|
||||
config.model_reasoning_summary,
|
||||
config.model_context_window,
|
||||
config.model_max_output_tokens,
|
||||
config.model_auto_compact_token_limit,
|
||||
config.approval_policy,
|
||||
config.sandbox_policy.clone(),
|
||||
@@ -3072,8 +3071,6 @@ mod tests {
|
||||
with_escalated_permissions: Some(true),
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let params2 = ExecParams {
|
||||
@@ -3084,8 +3081,6 @@ mod tests {
|
||||
env: HashMap::new(),
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
|
||||
@@ -267,6 +267,20 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_powershell_full_path_is_safe() {
|
||||
if !cfg!(windows) {
|
||||
// Windows only because on Linux path splitting doesn't handle `/` separators properly
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
r"C:\Program Files\PowerShell\7\pwsh.exe",
|
||||
"-Command",
|
||||
"Get-Location",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_safe_examples() {
|
||||
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use shlex::split as shlex_split;
|
||||
use std::path::Path;
|
||||
|
||||
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
|
||||
/// that match a small safelist. Anything else (including direct CMD commands) is unsafe.
|
||||
@@ -131,8 +132,14 @@ fn split_into_commands(tokens: Vec<String>) -> Option<Vec<Vec<String>>> {
|
||||
|
||||
/// Returns true when the executable name is one of the supported PowerShell binaries.
|
||||
fn is_powershell_executable(exe: &str) -> bool {
|
||||
let executable_name = Path::new(exe)
|
||||
.file_name()
|
||||
.and_then(|osstr| osstr.to_str())
|
||||
.unwrap_or(exe)
|
||||
.to_ascii_lowercase();
|
||||
|
||||
matches!(
|
||||
exe.to_ascii_lowercase().as_str(),
|
||||
executable_name.as_str(),
|
||||
"powershell" | "powershell.exe" | "pwsh" | "pwsh.exe"
|
||||
)
|
||||
}
|
||||
@@ -313,6 +320,27 @@ mod tests {
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_full_path_powershell_invocations() {
|
||||
if !cfg!(windows) {
|
||||
// Windows only because on Linux path splitting doesn't handle `/` separators properly
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
r"C:\Program Files\PowerShell\7\pwsh.exe",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
"Get-ChildItem -Path .",
|
||||
])));
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content Cargo.toml",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_read_only_pipelines_and_git_usage() {
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
|
||||
@@ -86,9 +86,6 @@ pub struct Config {
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<i64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<i64>,
|
||||
|
||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||
pub model_auto_compact_token_limit: Option<i64>,
|
||||
|
||||
@@ -570,9 +567,6 @@ pub struct ConfigToml {
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<i64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<i64>,
|
||||
|
||||
/// Token usage threshold triggering auto-compaction of conversation history.
|
||||
pub model_auto_compact_token_limit: Option<i64>,
|
||||
|
||||
@@ -1122,11 +1116,6 @@ impl Config {
|
||||
let model_context_window = cfg
|
||||
.model_context_window
|
||||
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
||||
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
.map(|info| info.max_output_tokens)
|
||||
});
|
||||
let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
@@ -1178,7 +1167,6 @@ impl Config {
|
||||
review_model,
|
||||
model_family,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
model_auto_compact_token_limit,
|
||||
model_provider_id,
|
||||
model_provider,
|
||||
@@ -2961,7 +2949,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_auto_compact_token_limit: Some(180_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
@@ -3034,7 +3021,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
model_auto_compact_token_limit: Some(14_746),
|
||||
model_provider_id: "openai-chat-completions".to_string(),
|
||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||
@@ -3122,7 +3108,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_auto_compact_token_limit: Some(180_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
@@ -3196,7 +3181,6 @@ model_verbosity = "high"
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use std::ops::Deref;
|
||||
@@ -131,47 +130,6 @@ impl ContextManager {
|
||||
normalize::remove_orphan_outputs(&mut self.items);
|
||||
}
|
||||
|
||||
fn get_shell_truncation_policy(&self, call_id: &str) -> Option<TruncationPolicy> {
|
||||
let call = self.get_call_for_call_id(call_id)?;
|
||||
match call {
|
||||
ResponseItem::FunctionCall { arguments, .. } => {
|
||||
let shell_tool_call_params =
|
||||
serde_json::from_str::<ShellToolCallParams>(&arguments).ok()?;
|
||||
Self::create_truncation_policy(
|
||||
shell_tool_call_params.max_output_tokens,
|
||||
shell_tool_call_params.max_output_chars,
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_call_for_call_id(&self, call_id: &str) -> Option<ResponseItem> {
|
||||
self.items.iter().find_map(|item| match item {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => {
|
||||
if existing == call_id {
|
||||
Some(item.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
@@ -185,12 +143,13 @@ impl ContextManager {
|
||||
let policy_with_serialization_budget = policy.mul(1.2);
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncation_policy_override = self.get_shell_truncation_policy(call_id);
|
||||
let truncation_policy =
|
||||
truncation_policy_override.unwrap_or(policy_with_serialization_budget);
|
||||
let truncated = truncate_text(output.content.as_str(), truncation_policy);
|
||||
let truncated =
|
||||
truncate_text(output.content.as_str(), policy_with_serialization_budget);
|
||||
let truncated_items = output.content_items.as_ref().map(|items| {
|
||||
truncate_function_output_items_with_policy(items, truncation_policy)
|
||||
truncate_function_output_items_with_policy(
|
||||
items,
|
||||
policy_with_serialization_budget,
|
||||
)
|
||||
});
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
|
||||
@@ -117,7 +117,7 @@ pub fn parse_turn_item(item: &ResponseItem) -> Option<TurnItem> {
|
||||
..
|
||||
} => Some(TurnItem::WebSearch(WebSearchItem {
|
||||
id: id.clone().unwrap_or_default(),
|
||||
query: query.clone(),
|
||||
query: query.clone().unwrap_or_default(),
|
||||
})),
|
||||
_ => None,
|
||||
}
|
||||
@@ -306,7 +306,7 @@ mod tests {
|
||||
id: Some("ws_1".to_string()),
|
||||
status: Some("completed".to_string()),
|
||||
action: WebSearchAction::Search {
|
||||
query: "weather".to_string(),
|
||||
query: Some("weather".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -57,8 +57,6 @@ pub struct ExecParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// Mechanism to terminate an exec invocation before it finishes naturally.
|
||||
@@ -143,8 +141,6 @@ pub async fn process_exec_tool_call(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0: _,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = params;
|
||||
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
@@ -162,8 +158,6 @@ pub async fn process_exec_tool_call(
|
||||
expiration,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let manager = SandboxManager::new();
|
||||
@@ -195,8 +189,6 @@ pub(crate) async fn execute_exec_env(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = env;
|
||||
|
||||
let params = ExecParams {
|
||||
@@ -207,8 +199,6 @@ pub(crate) async fn execute_exec_env(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
@@ -851,8 +841,6 @@ mod tests {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
|
||||
@@ -898,8 +886,6 @@ mod tests {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(Duration::from_millis(1_000)).await;
|
||||
|
||||
@@ -825,11 +825,21 @@ mod tests {
|
||||
.await
|
||||
.expect("Should collect git info from repo");
|
||||
|
||||
let remote_url_output = Command::new("git")
|
||||
.args(["remote", "get-url", "origin"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to read remote url");
|
||||
// Some dev environments rewrite remotes (e.g., force SSH), so compare against
|
||||
// whatever URL Git reports instead of a fixed placeholder.
|
||||
let expected_remote = String::from_utf8(remote_url_output.stdout)
|
||||
.unwrap()
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
// Should have repository URL
|
||||
assert_eq!(
|
||||
git_info.repository_url,
|
||||
Some("https://github.com/example/repo.git".to_string())
|
||||
);
|
||||
assert_eq!(git_info.repository_url, Some(expected_remote));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -76,7 +76,6 @@ macro_rules! model_family {
|
||||
(
|
||||
$slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
|
||||
) => {{
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
// defaults
|
||||
#[allow(unused_mut)]
|
||||
let mut mf = ModelFamily {
|
||||
@@ -91,10 +90,10 @@ macro_rules! model_family {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
};
|
||||
|
||||
// apply overrides
|
||||
@@ -138,8 +137,20 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("robin") {
|
||||
model_family!(
|
||||
slug, "gpt-5.1",
|
||||
supports_reasoning_summaries: true,
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
support_verbosity: true,
|
||||
default_verbosity: Some(Verbosity::Low),
|
||||
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
|
||||
default_reasoning_effort: Some(ReasoningEffort::Medium),
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
)
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -152,13 +163,13 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"test_sync_tool".to_string(),
|
||||
],
|
||||
supports_parallel_tool_calls: true,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Internal models.
|
||||
} else if slug.starts_with("codex-exp-") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -170,44 +181,41 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"list_dir".to_string(),
|
||||
"read_file".to_string(),
|
||||
],
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: true,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Production models.
|
||||
} else if slug.starts_with("gpt-5.1-codex-max") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
} else if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("codex-")
|
||||
{
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
} else if slug.starts_with("gpt-5.1") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, "gpt-5.1",
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -217,7 +225,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
|
||||
default_reasoning_effort: Some(ReasoningEffort::Medium),
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5") {
|
||||
@@ -225,7 +233,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
slug, "gpt-5",
|
||||
supports_reasoning_summaries: true,
|
||||
needs_special_apply_patch_instructions: true,
|
||||
shell_type: ConfigShellToolType::Default(TruncationPolicy::Bytes(10_000)),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
)
|
||||
@@ -235,7 +243,6 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
}
|
||||
|
||||
pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
ModelFamily {
|
||||
slug: model.to_string(),
|
||||
family: model.to_string(),
|
||||
@@ -248,9 +255,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use crate::model_family::ModelFamily;
|
||||
|
||||
// Shared constants for commonly used window/token sizes.
|
||||
pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000;
|
||||
pub(crate) const MAX_OUTPUT_TOKENS_128K: i64 = 128_000;
|
||||
|
||||
/// Metadata about a model, particularly OpenAI models.
|
||||
/// We may want to consider including details like the pricing for
|
||||
@@ -14,19 +13,15 @@ pub(crate) struct ModelInfo {
|
||||
/// Size of the context window in tokens. This is the maximum size of the input context.
|
||||
pub(crate) context_window: i64,
|
||||
|
||||
/// Maximum number of output tokens that can be generated for the model.
|
||||
pub(crate) max_output_tokens: i64,
|
||||
|
||||
/// Token threshold where we should automatically compact conversation history. This considers
|
||||
/// input tokens + output tokens of this turn.
|
||||
pub(crate) auto_compact_token_limit: Option<i64>,
|
||||
}
|
||||
|
||||
impl ModelInfo {
|
||||
const fn new(context_window: i64, max_output_tokens: i64) -> Self {
|
||||
const fn new(context_window: i64) -> Self {
|
||||
Self {
|
||||
context_window,
|
||||
max_output_tokens,
|
||||
auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)),
|
||||
}
|
||||
}
|
||||
@@ -42,48 +37,44 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||
// OSS models have a 128k shared token pool.
|
||||
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
|
||||
// https://openai.com/index/gpt-oss-model-card/
|
||||
"gpt-oss-20b" => Some(ModelInfo::new(96_000, 32_000)),
|
||||
"gpt-oss-120b" => Some(ModelInfo::new(96_000, 32_000)),
|
||||
"gpt-oss-20b" => Some(ModelInfo::new(96_000)),
|
||||
"gpt-oss-120b" => Some(ModelInfo::new(96_000)),
|
||||
// https://platform.openai.com/docs/models/o3
|
||||
"o3" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"o3" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/o4-mini
|
||||
"o4-mini" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"o4-mini" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/codex-mini-latest
|
||||
"codex-mini-latest" => Some(ModelInfo::new(200_000, 100_000)),
|
||||
"codex-mini-latest" => Some(ModelInfo::new(200_000)),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
||||
// https://platform.openai.com/docs/models/gpt-4.1
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576, 32_768)),
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576)),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
||||
// https://platform.openai.com/docs/models/gpt-4o
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000, 16_384)),
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000, 4_096)),
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000, 16_384)),
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000)),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
|
||||
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385)),
|
||||
|
||||
_ if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex-max") =>
|
||||
{
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K))
|
||||
}
|
||||
|
||||
_ if slug.starts_with("gpt-5") => {
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
}
|
||||
_ if slug.starts_with("gpt-5") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)),
|
||||
|
||||
_ if slug.starts_with("codex-") => {
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
}
|
||||
_ if slug.starts_with("codex-") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
|
||||
@@ -58,8 +58,6 @@ pub struct CommandSpec {
|
||||
pub expiration: ExecExpiration,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -72,8 +70,6 @@ pub struct ExecEnv {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
pub enum SandboxPreference {
|
||||
@@ -188,8 +184,6 @@ impl SandboxManager {
|
||||
with_escalated_permissions: spec.with_escalated_permissions,
|
||||
justification: spec.justification,
|
||||
arg0: arg0_override,
|
||||
max_output_tokens: spec.max_output_tokens,
|
||||
max_output_chars: spec.max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -102,8 +102,6 @@ impl SessionTask for UserShellCommandTask {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let stdout_stream = Some(StdoutStream {
|
||||
|
||||
@@ -15,8 +15,6 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
@@ -31,7 +29,6 @@ pub(crate) struct ToolEventCtx<'a> {
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
pub override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
}
|
||||
|
||||
impl<'a> ToolEventCtx<'a> {
|
||||
@@ -40,14 +37,12 @@ impl<'a> ToolEventCtx<'a> {
|
||||
turn: &'a TurnContext,
|
||||
call_id: &'a str,
|
||||
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
) -> Self {
|
||||
Self {
|
||||
session,
|
||||
turn,
|
||||
call_id,
|
||||
turn_diff_tracker,
|
||||
override_truncation_policy,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -260,13 +255,13 @@ impl ToolEmitter {
|
||||
fn format_exec_output_for_model(
|
||||
&self,
|
||||
output: &ExecToolCallOutput,
|
||||
truncation_policy: &TruncationPolicy,
|
||||
ctx: ToolEventCtx<'_>,
|
||||
) -> String {
|
||||
match self {
|
||||
Self::Shell { freeform: true, .. } => {
|
||||
super::format_exec_output_for_model_freeform(output, *truncation_policy)
|
||||
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
|
||||
}
|
||||
_ => super::format_exec_output_for_model_structured(output, *truncation_policy),
|
||||
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,12 +270,9 @@ impl ToolEmitter {
|
||||
ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let truncation_policy = ctx
|
||||
.override_truncation_policy
|
||||
.unwrap_or(&ctx.turn.truncation_policy);
|
||||
let (event, result) = match out {
|
||||
Ok(output) => {
|
||||
let content = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let content = self.format_exec_output_for_model(&output, ctx);
|
||||
let exit_code = output.exit_code;
|
||||
let event = ToolEventStage::Success(output);
|
||||
let result = if exit_code == 0 {
|
||||
@@ -292,26 +284,24 @@ impl ToolEmitter {
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let response = self.format_exec_output_for_model(&output, ctx);
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
let result = Err(FunctionCallError::RespondToModel(response));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let formatted_error = formatted_truncate_text(&err.to_string(), *truncation_policy);
|
||||
let message = format!("execution error: {formatted_error}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
let result = Err(FunctionCallError::RespondToModel(formatted_error));
|
||||
let message = format!("execution error: {err:?}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(message));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) => {
|
||||
let formatted_msg = formatted_truncate_text(&msg, *truncation_policy);
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if formatted_msg == "rejected by user" {
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
formatted_msg
|
||||
msg
|
||||
};
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(normalized));
|
||||
|
||||
@@ -100,7 +100,6 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
@@ -128,7 +127,6 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
|
||||
@@ -27,7 +27,6 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
|
||||
pub struct ShellHandler;
|
||||
|
||||
@@ -43,8 +42,6 @@ impl ShellHandler {
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -67,8 +64,6 @@ impl ShellCommandHandler {
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -214,9 +209,6 @@ impl ShellHandler {
|
||||
)));
|
||||
}
|
||||
|
||||
let override_truncation_policy =
|
||||
create_truncation_policy(exec_params.max_output_tokens, exec_params.max_output_chars);
|
||||
|
||||
// Intercept apply_patch if present.
|
||||
match codex_apply_patch::maybe_parse_apply_patch_verified(
|
||||
&exec_params.command,
|
||||
@@ -245,7 +237,6 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
@@ -272,7 +263,6 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
return Ok(ToolOutput::Function {
|
||||
@@ -304,13 +294,7 @@ impl ShellHandler {
|
||||
source,
|
||||
freeform,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ShellRequest {
|
||||
@@ -320,8 +304,6 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
max_output_tokens: exec_params.max_output_tokens,
|
||||
max_output_chars: exec_params.max_output_chars,
|
||||
approval_requirement: create_approval_requirement_for_command(
|
||||
&turn.exec_policy,
|
||||
&exec_params.command,
|
||||
@@ -341,13 +323,7 @@ impl ShellHandler {
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
@@ -357,16 +333,6 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -162,7 +162,6 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&command,
|
||||
|
||||
@@ -116,8 +116,6 @@ impl ToolRouter {
|
||||
timeout_ms: exec.timeout_ms,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: "local_shell".to_string(),
|
||||
|
||||
@@ -72,8 +72,6 @@ impl ApplyPatchRuntime {
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ pub mod unified_exec;
|
||||
|
||||
/// Shared helper to construct a CommandSpec from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn build_command_spec(
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
@@ -24,8 +23,6 @@ pub(crate) fn build_command_spec(
|
||||
expiration: ExecExpiration,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Result<CommandSpec, ToolError> {
|
||||
let (program, args) = command
|
||||
.split_first()
|
||||
@@ -38,7 +35,5 @@ pub(crate) fn build_command_spec(
|
||||
expiration,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -31,8 +31,6 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
@@ -138,8 +136,6 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
req.timeout_ms.into(),
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)?;
|
||||
let env = attempt
|
||||
.env_for(spec)
|
||||
|
||||
@@ -35,8 +35,6 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
@@ -75,8 +73,6 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
@@ -158,8 +154,6 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
||||
ExecExpiration::DefaultTimeout,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
|
||||
@@ -8,7 +8,6 @@ use crate::tools::handlers::apply_patch::ApplyPatchToolType;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
|
||||
use crate::tools::registry::ToolRegistryBuilder;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
@@ -18,7 +17,7 @@ use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ConfigShellToolType {
|
||||
Default(TruncationPolicy),
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
@@ -27,7 +26,7 @@ pub enum ConfigShellToolType {
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand(TruncationPolicy),
|
||||
ShellCommand,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -265,7 +264,7 @@ fn create_write_stdin_tool() -> ToolSpec {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
fn create_shell_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -299,24 +298,6 @@ fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of tokens to return from stdout/stderr. Excess tokens will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of characters to return from stdout/stderr. Excess characters will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
@@ -347,7 +328,7 @@ Examples of valid command strings:
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
fn create_shell_command_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -381,30 +362,6 @@ fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
@@ -1042,8 +999,8 @@ pub(crate) fn build_specs(
|
||||
let shell_command_handler = Arc::new(ShellCommandHandler);
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default(truncation_policy) => {
|
||||
builder.push_spec(create_shell_tool(*truncation_policy));
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
@@ -1057,8 +1014,8 @@ pub(crate) fn build_specs(
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand(truncation_policy) => {
|
||||
builder.push_spec(create_shell_command_tool(*truncation_policy));
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1201,11 +1158,11 @@ mod tests {
|
||||
|
||||
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
|
||||
match config.shell_type {
|
||||
ConfigShellToolType::Default(_) => Some("shell"),
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand(_) => Some("shell_command"),
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1951,7 +1908,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool() {
|
||||
let tool = super::create_shell_tool(TruncationPolicy::Bytes(10_000));
|
||||
let tool = super::create_shell_tool();
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
@@ -1981,7 +1938,7 @@ Examples of valid command strings:
|
||||
|
||||
#[test]
|
||||
fn test_shell_command_tool() {
|
||||
let tool = super::create_shell_command_tool(TruncationPolicy::Tokens(10_000));
|
||||
let tool = super::create_shell_command_tool();
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
|
||||
@@ -174,7 +174,6 @@ impl UnifiedExecSessionManager {
|
||||
turn_ref.as_ref(),
|
||||
request.call_id,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
};
|
||||
interaction_emitter
|
||||
@@ -370,7 +369,6 @@ impl UnifiedExecSessionManager {
|
||||
entry.turn_ref.as_ref(),
|
||||
&entry.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&entry.command,
|
||||
@@ -404,7 +402,6 @@ impl UnifiedExecSessionManager {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter =
|
||||
ToolEmitter::unified_exec(command, cwd, ExecCommandSource::UnifiedExecStartup, None);
|
||||
|
||||
@@ -18,3 +18,4 @@ tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["time"] }
|
||||
walkdir = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
|
||||
@@ -172,6 +172,15 @@ pub fn sandbox_network_env_var() -> &'static str {
|
||||
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
|
||||
}
|
||||
|
||||
pub fn format_with_current_shell(command: &str) -> Vec<String> {
|
||||
codex_core::shell::default_user_shell().derive_exec_args(command, true)
|
||||
}
|
||||
|
||||
pub fn format_with_current_shell_display(command: &str) -> String {
|
||||
let args = format_with_current_shell(command);
|
||||
shlex::try_join(args.iter().map(String::as_str)).expect("serialize current shell command")
|
||||
}
|
||||
|
||||
pub mod fs_wait {
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
|
||||
@@ -462,8 +462,11 @@ pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
|
||||
|
||||
pub fn ev_shell_command_call(call_id: &str, command: &str) -> Value {
|
||||
let args = serde_json::json!({ "command": command });
|
||||
let arguments = serde_json::to_string(&args).expect("serialize shell arguments");
|
||||
ev_shell_command_call_with_args(call_id, &args)
|
||||
}
|
||||
|
||||
pub fn ev_shell_command_call_with_args(call_id: &str, args: &serde_json::Value) -> Value {
|
||||
let arguments = serde_json::to_string(args).expect("serialize shell command arguments");
|
||||
ev_function_call(call_id, "shell_command", &arguments)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,15 +17,11 @@ use core_test_support::wait_for_event;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
/// Integration test: spawn a long‑running shell tool via a mocked Responses SSE
|
||||
/// Integration test: spawn a long‑running shell_command tool via a mocked Responses SSE
|
||||
/// function call, then interrupt the session and expect TurnAborted.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
let command = "sleep 60";
|
||||
|
||||
let args = json!({
|
||||
"command": command,
|
||||
@@ -33,14 +29,19 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
})
|
||||
.to_string();
|
||||
let body = sse(vec![
|
||||
ev_function_call("call_sleep", "shell", &args),
|
||||
ev_function_call("call_sleep", "shell_command", &args),
|
||||
ev_completed("done"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_once(&server, body).await;
|
||||
|
||||
let codex = test_codex().build(&server).await.unwrap().codex;
|
||||
let codex = test_codex()
|
||||
.with_model("gpt-5.1")
|
||||
.build(&server)
|
||||
.await
|
||||
.unwrap()
|
||||
.codex;
|
||||
|
||||
// Kick off a turn that triggers the function call.
|
||||
codex
|
||||
@@ -67,11 +68,7 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
/// responses server, and ensures the model receives the synthesized abort.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_tool_records_history_entries() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
let command = "sleep 60";
|
||||
let call_id = "call-history";
|
||||
|
||||
let args = json!({
|
||||
@@ -81,7 +78,7 @@ async fn interrupt_tool_records_history_entries() {
|
||||
.to_string();
|
||||
let first_body = sse(vec![
|
||||
ev_response_created("resp-history"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_function_call(call_id, "shell_command", &args),
|
||||
ev_completed("resp-history"),
|
||||
]);
|
||||
let follow_up_body = sse(vec![
|
||||
@@ -92,7 +89,11 @@ async fn interrupt_tool_records_history_entries() {
|
||||
let server = start_mock_server().await;
|
||||
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
|
||||
|
||||
let fixture = test_codex().build(&server).await.unwrap();
|
||||
let fixture = test_codex()
|
||||
.with_model("gpt-5.1")
|
||||
.build(&server)
|
||||
.await
|
||||
.unwrap();
|
||||
let codex = Arc::clone(&fixture.codex);
|
||||
|
||||
codex
|
||||
|
||||
@@ -667,7 +667,7 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects(
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<()> {
|
||||
async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
@@ -684,14 +684,11 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
|
||||
let script = "cd sub && apply_patch <<'EOF'\n*** Begin Patch\n*** Update File: in_sub.txt\n@@\n-before\n+after\n*** End Patch\nEOF\n";
|
||||
let call_id = "shell-heredoc-cd";
|
||||
let args = json!({
|
||||
"command": ["bash", "-lc", script],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let args = json!({ "command": script, "timeout_ms": 5_000 });
|
||||
let bodies = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
@@ -706,14 +703,14 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
let out = harness.function_call_stdout(call_id).await;
|
||||
assert!(
|
||||
out.contains("Success."),
|
||||
"expected successful apply_patch invocation via shell: {out}"
|
||||
"expected successful apply_patch invocation via shell_command: {out}"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&target)?, "after\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<()> {
|
||||
async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
@@ -730,14 +727,11 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
|
||||
let script = "apply_patch <<'EOF'\n*** Begin Patch\n*** Update File: invalid.txt\n@@\n-nope\n+changed\n*** End Patch\nEOF\n";
|
||||
let call_id = "shell-apply-failure";
|
||||
let args = json!({
|
||||
"command": ["bash", "-lc", script],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let args = json!({ "command": script, "timeout_ms": 5_000 });
|
||||
let bodies = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
@@ -780,10 +774,6 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
);
|
||||
|
||||
let out = harness.function_call_stdout(call_id).await;
|
||||
assert!(
|
||||
out.contains("apply_patch verification failed"),
|
||||
"expected verification failure message"
|
||||
);
|
||||
assert!(
|
||||
out.contains("Failed to find expected lines in"),
|
||||
"expected failure diagnostics: {out}"
|
||||
|
||||
@@ -71,7 +71,7 @@ enum ActionKind {
|
||||
response_body: &'static str,
|
||||
},
|
||||
RunCommand {
|
||||
command: &'static [&'static str],
|
||||
command: &'static str,
|
||||
},
|
||||
RunUnifiedExecCommand {
|
||||
command: &'static str,
|
||||
@@ -97,20 +97,12 @@ impl ActionKind {
|
||||
server: &MockServer,
|
||||
call_id: &str,
|
||||
with_escalated_permissions: bool,
|
||||
) -> Result<(Value, Option<Vec<String>>)> {
|
||||
) -> Result<(Value, Option<String>)> {
|
||||
match self {
|
||||
ActionKind::WriteFile { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
let _ = fs::remove_file(&path);
|
||||
let command = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
format!(
|
||||
"printf {content:?} > {path:?} && cat {path:?}",
|
||||
content = content,
|
||||
path = path
|
||||
),
|
||||
];
|
||||
let command = format!("printf {content:?} > {path:?} && cat {path:?}");
|
||||
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
|
||||
Ok((event, Some(command)))
|
||||
}
|
||||
@@ -127,21 +119,18 @@ impl ActionKind {
|
||||
.await;
|
||||
|
||||
let url = format!("{}{}", server.uri(), endpoint);
|
||||
let escaped_url = url.replace('\'', "\\'");
|
||||
let script = format!(
|
||||
"import sys\nimport urllib.request\nurl = {url:?}\ntry:\n data = urllib.request.urlopen(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
||||
"import sys\nimport urllib.request\nurl = '{escaped_url}'\ntry:\n data = urllib.request.urlopen(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
||||
);
|
||||
|
||||
let command = vec!["python3".to_string(), "-c".to_string(), script];
|
||||
let command = format!("python3 -c \"{script}\"");
|
||||
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
|
||||
Ok((event, Some(command)))
|
||||
}
|
||||
ActionKind::RunCommand { command } => {
|
||||
let command: Vec<String> = command
|
||||
.iter()
|
||||
.map(std::string::ToString::to_string)
|
||||
.collect();
|
||||
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
|
||||
Ok((event, Some(command)))
|
||||
let event = shell_event(call_id, command, 1_000, with_escalated_permissions)?;
|
||||
Ok((event, Some(command.to_string())))
|
||||
}
|
||||
ActionKind::RunUnifiedExecCommand {
|
||||
command,
|
||||
@@ -154,14 +143,7 @@ impl ActionKind {
|
||||
with_escalated_permissions,
|
||||
*justification,
|
||||
)?;
|
||||
Ok((
|
||||
event,
|
||||
Some(vec![
|
||||
"/bin/bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
command.to_string(),
|
||||
]),
|
||||
))
|
||||
Ok((event, Some(command.to_string())))
|
||||
}
|
||||
ActionKind::ApplyPatchFunction { target, content } => {
|
||||
let (path, patch_path) = target.resolve_for_patch(test);
|
||||
@@ -185,19 +167,19 @@ fn build_add_file_patch(patch_path: &str, content: &str) -> String {
|
||||
format!("*** Begin Patch\n*** Add File: {patch_path}\n+{content}\n*** End Patch\n")
|
||||
}
|
||||
|
||||
fn shell_apply_patch_command(patch: &str) -> Vec<String> {
|
||||
fn shell_apply_patch_command(patch: &str) -> String {
|
||||
let mut script = String::from("apply_patch <<'PATCH'\n");
|
||||
script.push_str(patch);
|
||||
if !patch.ends_with('\n') {
|
||||
script.push('\n');
|
||||
}
|
||||
script.push_str("PATCH\n");
|
||||
vec!["bash".to_string(), "-lc".to_string(), script]
|
||||
script
|
||||
}
|
||||
|
||||
fn shell_event(
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
command: &str,
|
||||
timeout_ms: u64,
|
||||
with_escalated_permissions: bool,
|
||||
) -> Result<Value> {
|
||||
@@ -209,7 +191,7 @@ fn shell_event(
|
||||
args["with_escalated_permissions"] = json!(true);
|
||||
}
|
||||
let args_str = serde_json::to_string(&args)?;
|
||||
Ok(ev_function_call(call_id, "shell", &args_str))
|
||||
Ok(ev_function_call(call_id, "shell_command", &args_str))
|
||||
}
|
||||
|
||||
fn exec_command_event(
|
||||
@@ -296,7 +278,10 @@ impl Expectation {
|
||||
}
|
||||
Expectation::FileCreatedNoExitCode { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
|
||||
assert!(
|
||||
result.exit_code.is_none() || result.exit_code == Some(0),
|
||||
"expected no exit code for {path:?}",
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
"stdout missing {content:?}: {}",
|
||||
@@ -385,8 +370,8 @@ impl Expectation {
|
||||
);
|
||||
}
|
||||
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
assert!(
|
||||
result.exit_code.is_none() || result.exit_code == Some(0),
|
||||
"expected no exit code for successful network call: {}",
|
||||
result.stdout
|
||||
);
|
||||
@@ -433,8 +418,8 @@ impl Expectation {
|
||||
);
|
||||
}
|
||||
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
assert!(
|
||||
result.exit_code.is_none() || result.exit_code == Some(0),
|
||||
"expected no exit code for trusted command: {}",
|
||||
result.stdout
|
||||
);
|
||||
@@ -531,10 +516,18 @@ fn parse_result(item: &Value) -> CommandResult {
|
||||
CommandResult { exit_code, stdout }
|
||||
}
|
||||
Err(_) => {
|
||||
let structured = Regex::new(r"(?s)^Exit code:\s*(-?\d+).*?Output:\n(.*)$").unwrap();
|
||||
let regex =
|
||||
Regex::new(r"(?s)^.*?Process exited with code (\d+)\n.*?Output:\n(.*)$").unwrap();
|
||||
// parse freeform output
|
||||
if let Some(captures) = regex.captures(output_str) {
|
||||
if let Some(captures) = structured.captures(output_str) {
|
||||
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
||||
let output = captures.get(2).unwrap().as_str();
|
||||
CommandResult {
|
||||
exit_code: Some(exit_code),
|
||||
stdout: output.to_string(),
|
||||
}
|
||||
} else if let Some(captures) = regex.captures(output_str) {
|
||||
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
||||
let output = captures.get(2).unwrap().as_str();
|
||||
CommandResult {
|
||||
@@ -553,7 +546,7 @@ fn parse_result(item: &Value) -> CommandResult {
|
||||
|
||||
async fn expect_exec_approval(
|
||||
test: &TestCodex,
|
||||
expected_command: &[String],
|
||||
expected_command: &str,
|
||||
) -> ExecApprovalRequestEvent {
|
||||
let event = wait_for_event(&test.codex, |event| {
|
||||
matches!(
|
||||
@@ -565,7 +558,12 @@ async fn expect_exec_approval(
|
||||
|
||||
match event {
|
||||
EventMsg::ExecApprovalRequest(approval) => {
|
||||
assert_eq!(approval.command, expected_command);
|
||||
let last_arg = approval
|
||||
.command
|
||||
.last()
|
||||
.map(std::string::String::as_str)
|
||||
.unwrap_or_default();
|
||||
assert_eq!(last_arg, expected_command);
|
||||
approval
|
||||
}
|
||||
EventMsg::TaskComplete(_) => panic!("expected approval request before completion"),
|
||||
@@ -660,7 +658,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
@@ -702,7 +700,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
command: "echo trusted-unless",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
@@ -717,7 +715,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
command: "echo trusted-unless",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
@@ -880,7 +878,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
command: "echo trusted-read-only",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
@@ -895,7 +893,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
command: "echo trusted-read-only",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
@@ -1020,7 +1018,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_requires_patch_approval",
|
||||
name: "apply_patch_shell_command_requires_patch_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: workspace_write(false),
|
||||
action: ActionKind::ApplyPatchShell {
|
||||
@@ -1114,7 +1112,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_outside_requires_patch_approval",
|
||||
name: "apply_patch_shell_command_outside_requires_patch_approval",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: workspace_write(false),
|
||||
action: ActionKind::ApplyPatchShell {
|
||||
@@ -1229,7 +1227,10 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
message_contains: if cfg!(target_os = "linux") {
|
||||
&["Permission denied"]
|
||||
} else {
|
||||
&["Permission denied|Operation not permitted|Read-only file system"]
|
||||
&[
|
||||
"Permission denied|Operation not permitted|operation not permitted|\
|
||||
Read-only file system",
|
||||
]
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -1238,7 +1239,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
approval_policy: Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-never"],
|
||||
command: "echo trusted-never",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
@@ -1373,7 +1374,10 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
message_contains: if cfg!(target_os = "linux") {
|
||||
&["Permission denied"]
|
||||
} else {
|
||||
&["Permission denied|Operation not permitted|Read-only file system"]
|
||||
&[
|
||||
"Permission denied|Operation not permitted|operation not permitted|\
|
||||
Read-only file system",
|
||||
]
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -1509,7 +1513,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
expected_reason,
|
||||
} => {
|
||||
let command = expected_command
|
||||
.as_ref()
|
||||
.as_deref()
|
||||
.expect("exec approval requires shell command");
|
||||
let approval = expect_exec_approval(&test, command).await;
|
||||
if let Some(expected_reason) = expected_reason {
|
||||
|
||||
@@ -499,9 +499,20 @@ async fn integration_git_info_unit_test() {
|
||||
"Git info should contain repository_url"
|
||||
);
|
||||
let repo_url = git_info.repository_url.as_ref().unwrap();
|
||||
// Some hosts rewrite remotes (e.g., github.com → git@github.com), so assert against
|
||||
// the actual remote reported by git instead of a static URL.
|
||||
let expected_remote_url = std::process::Command::new("git")
|
||||
.args(["remote", "get-url", "origin"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
let expected_remote_url = String::from_utf8(expected_remote_url.stdout)
|
||||
.unwrap()
|
||||
.trim()
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
repo_url, "https://github.com/example/integration-test.git",
|
||||
"Repository URL should match what we configured"
|
||||
repo_url, &expected_remote_url,
|
||||
"Repository URL should match git remote get-url output"
|
||||
);
|
||||
|
||||
println!("✅ Git info collection test passed!");
|
||||
|
||||
@@ -992,7 +992,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
||||
id: Some("web-search-id".into()),
|
||||
status: Some("completed".into()),
|
||||
action: WebSearchAction::Search {
|
||||
query: "weather".into(),
|
||||
query: Some("weather".into()),
|
||||
},
|
||||
});
|
||||
prompt.input.push(ResponseItem::FunctionCall {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -25,17 +26,17 @@ use pretty_assertions::assert_eq;
|
||||
async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Sub-agent turn 1: emit a shell function_call requiring approval, then complete.
|
||||
// Sub-agent turn 1: emit a shell_command function_call requiring approval, then complete.
|
||||
let call_id = "call-exec-1";
|
||||
let args = serde_json::json!({
|
||||
"command": ["bash", "-lc", "rm -rf delegated"],
|
||||
"command": "rm -rf delegated",
|
||||
"timeout_ms": 1000,
|
||||
"with_escalated_permissions": true,
|
||||
})
|
||||
.to_string();
|
||||
let sse1 = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_function_call(call_id, "shell_command", &args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
|
||||
@@ -61,6 +62,8 @@ async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.approval_policy = AskForApproval::OnRequest;
|
||||
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
});
|
||||
let test = builder.build(&server).await.expect("build test codex");
|
||||
|
||||
@@ -138,6 +141,8 @@ async fn codex_delegate_forwards_patch_approval_and_proceeds_on_decision() {
|
||||
// Use a restricted sandbox so patch approval is required
|
||||
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
});
|
||||
let test = builder.build(&server).await.expect("build test codex");
|
||||
|
||||
|
||||
@@ -37,8 +37,6 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -21,6 +22,11 @@ use std::fs;
|
||||
|
||||
#[tokio::test]
|
||||
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
// TODO execpolicy doesn't parse powershell commands yet
|
||||
if cfg!(windows) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
|
||||
fs::create_dir_all(
|
||||
@@ -34,13 +40,16 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1").expect("gpt-5.1 should have a model family");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"command": "echo blocked",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
@@ -48,7 +57,7 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
|
||||
@@ -146,10 +146,11 @@ async fn non_parallel_tools_run_serially() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = test_codex().build(&server).await?;
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let shell_args = json!({
|
||||
"command": ["/bin/sh", "-c", "sleep 0.3"],
|
||||
"command": "sleep 0.3",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let args_one = serde_json::to_string(&shell_args)?;
|
||||
@@ -157,8 +158,8 @@ async fn non_parallel_tools_run_serially() -> anyhow::Result<()> {
|
||||
|
||||
let first_response = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "shell", &args_one),
|
||||
ev_function_call("call-2", "shell", &args_two),
|
||||
ev_function_call("call-1", "shell_command", &args_one),
|
||||
ev_function_call("call-2", "shell_command", &args_two),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let second_response = sse(vec![
|
||||
@@ -167,7 +168,7 @@ async fn non_parallel_tools_run_serially() -> anyhow::Result<()> {
|
||||
]);
|
||||
mount_sse_sequence(&server, vec![first_response, second_response]).await;
|
||||
|
||||
let duration = run_turn_and_measure(&test, "run shell twice").await?;
|
||||
let duration = run_turn_and_measure(&test, "run shell_command twice").await?;
|
||||
assert_serial_duration(duration);
|
||||
|
||||
Ok(())
|
||||
@@ -185,14 +186,14 @@ async fn mixed_tools_fall_back_to_serial() -> anyhow::Result<()> {
|
||||
})
|
||||
.to_string();
|
||||
let shell_args = serde_json::to_string(&json!({
|
||||
"command": ["/bin/sh", "-c", "sleep 0.3"],
|
||||
"command": "sleep 0.3",
|
||||
"timeout_ms": 1_000,
|
||||
}))?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "test_sync_tool", &sync_args),
|
||||
ev_function_call("call-2", "shell", &shell_args),
|
||||
ev_function_call("call-2", "shell_command", &shell_args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let second_response = sse(vec![
|
||||
@@ -215,7 +216,7 @@ async fn tool_results_grouped() -> anyhow::Result<()> {
|
||||
let test = build_codex_with_test_tool(&server).await?;
|
||||
|
||||
let shell_args = serde_json::to_string(&json!({
|
||||
"command": ["/bin/sh", "-c", "echo 'shell output'"],
|
||||
"command": "echo 'shell output'",
|
||||
"timeout_ms": 1_000,
|
||||
}))?;
|
||||
|
||||
@@ -223,9 +224,9 @@ async fn tool_results_grouped() -> anyhow::Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "shell", &shell_args),
|
||||
ev_function_call("call-2", "shell", &shell_args),
|
||||
ev_function_call("call-3", "shell", &shell_args),
|
||||
ev_function_call("call-1", "shell_command", &shell_args),
|
||||
ev_function_call("call-2", "shell_command", &shell_args),
|
||||
ev_function_call("call-3", "shell_command", &shell_args),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
|
||||
@@ -98,7 +98,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
@@ -106,7 +106,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
// Use a model that exposes the shell_command tool.
|
||||
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
|
||||
config.tool_output_token_limit = Some(100_000);
|
||||
});
|
||||
@@ -114,28 +114,22 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
let command = if cfg!(windows) {
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
"seq 1 100000"
|
||||
};
|
||||
let args = serde_json::json!({
|
||||
"command": command,
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -167,7 +161,10 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 400097, "we should be almost 100k tokens");
|
||||
assert!(
|
||||
(400000..=401000).contains(&output.len()),
|
||||
"we should be almost 100k tokens"
|
||||
);
|
||||
|
||||
assert!(
|
||||
!output.contains("tokens truncated"),
|
||||
@@ -177,7 +174,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
@@ -185,34 +182,28 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
// Use a model that exposes the shell_command tool.
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
let command = if cfg!(windows) {
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
"seq 1 100000"
|
||||
};
|
||||
let args = serde_json::json!({
|
||||
"command": command,
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -250,14 +241,14 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
|
||||
let len = output.len();
|
||||
assert!(
|
||||
(9_900..=10_000).contains(&len),
|
||||
(9_900..=10_100).contains(&len),
|
||||
"expected ~10k chars after truncation, got {len}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
@@ -265,7 +256,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
// Use a model that exposes the shell_command tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
@@ -274,28 +265,22 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
let command = if cfg!(windows) {
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
"seq 1 100000"
|
||||
};
|
||||
let args = serde_json::json!({
|
||||
"command": command,
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -345,7 +330,7 @@ $"#;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Ensures shell tool outputs that exceed the line limit are truncated only once.
|
||||
// Ensures shell_command outputs that exceed the line limit are truncated only once.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -359,27 +344,21 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let call_id = "shell-single-truncation";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
let command = if cfg!(windows) {
|
||||
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 10000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
"seq 1 10000"
|
||||
};
|
||||
let args = serde_json::json!({
|
||||
"command": command,
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -619,7 +598,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
|
||||
|
||||
let call_id = "shell-token-marker";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 150"],
|
||||
"command": "seq 1 150",
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
@@ -627,7 +606,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -650,7 +629,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*tokens truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
@@ -672,7 +651,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
||||
|
||||
let call_id = "shell-byte-marker";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 150"],
|
||||
"command": "seq 1 150",
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
@@ -680,7 +659,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -703,16 +682,16 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
|
||||
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*chars truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Shell tool output should remain intact when the config opts into a large token budget.
|
||||
// shell_command output should remain intact when the config opts into a large token budget.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
async fn shell_command_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -726,7 +705,7 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
|
||||
let call_id = "shell-no-trunc";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 1000"],
|
||||
"command": "seq 1 1000",
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
|
||||
@@ -735,7 +714,7 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
|
||||
@@ -279,23 +279,19 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
config.tool_output_token_limit = Some(100);
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "user-shell-double-truncation";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"command": "for ($i=1; $i -le 2000; $i++) { Write-Output $i }",
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 2000"],
|
||||
"command": "seq 1 2000",
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
@@ -304,7 +300,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
@@ -319,19 +315,22 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
||||
.submit_turn_with_policy(
|
||||
"trigger big shell_command output",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
.context("function_call_output present for shell_command call")?;
|
||||
|
||||
let truncation_headers = output.matches("Total output lines:").count();
|
||||
|
||||
assert_eq!(
|
||||
truncation_headers, 1,
|
||||
"shell output should carry only one truncation header: {output}"
|
||||
"shell_command output should carry only one truncation header: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -86,8 +86,6 @@ impl EscalateServer {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
},
|
||||
get_platform_sandbox().unwrap_or(SandboxType::None),
|
||||
&sandbox_policy,
|
||||
|
||||
@@ -144,6 +144,7 @@ pub enum CommandExecutionStatus {
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
Declined,
|
||||
}
|
||||
|
||||
/// A command executed by the agent.
|
||||
|
||||
@@ -45,8 +45,6 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
||||
@@ -150,8 +148,6 @@ async fn assert_network_blocked(cmd: &[&str]) {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
@@ -23,3 +23,5 @@ tokio = { workspace = true, features = [
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
wiremock = { workspace = true }
|
||||
core_test_support = { path = "../../../core/tests/common" }
|
||||
shlex = { workspace = true }
|
||||
|
||||
@@ -2,12 +2,13 @@ mod mcp_process;
|
||||
mod mock_model_server;
|
||||
mod responses;
|
||||
|
||||
pub use core_test_support::format_with_current_shell;
|
||||
pub use mcp_process::McpProcess;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
pub use responses::create_apply_patch_sse_response;
|
||||
pub use responses::create_final_assistant_message_sse_response;
|
||||
pub use responses::create_shell_sse_response;
|
||||
pub use responses::create_shell_command_sse_response;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
pub fn to_response<T: DeserializeOwned>(response: JSONRPCResponse) -> anyhow::Result<T> {
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
use serde_json::json;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn create_shell_sse_response(
|
||||
pub fn create_shell_command_sse_response(
|
||||
command: Vec<String>,
|
||||
workdir: Option<&Path>,
|
||||
timeout_ms: Option<u64>,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// The `arguments`` for the `shell` tool is a serialized JSON object.
|
||||
// The `arguments` for the `shell_command` tool is a serialized JSON object.
|
||||
let command_str = shlex::try_join(command.iter().map(String::as_str))?;
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": command,
|
||||
"command": command_str,
|
||||
"workdir": workdir.map(|w| w.to_string_lossy()),
|
||||
"timeout": timeout_ms
|
||||
"timeout_ms": timeout_ms
|
||||
}))?;
|
||||
let tool_call = json!({
|
||||
"choices": [
|
||||
@@ -21,7 +22,7 @@ pub fn create_shell_sse_response(
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"name": "shell_command",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
@@ -62,10 +63,10 @@ pub fn create_apply_patch_sse_response(
|
||||
patch_content: &str,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// Use shell command to call apply_patch with heredoc format
|
||||
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
|
||||
// Use shell_command to call apply_patch with heredoc format
|
||||
let command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": ["bash", "-lc", shell_command]
|
||||
"command": command
|
||||
}))?;
|
||||
|
||||
let tool_call = json!({
|
||||
@@ -76,7 +77,7 @@ pub fn create_apply_patch_sse_response(
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"name": "shell_command",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,8 @@ use mcp_test_support::McpProcess;
|
||||
use mcp_test_support::create_apply_patch_sse_response;
|
||||
use mcp_test_support::create_final_assistant_message_sse_response;
|
||||
use mcp_test_support::create_mock_chat_completions_server;
|
||||
use mcp_test_support::create_shell_sse_response;
|
||||
use mcp_test_support::create_shell_command_sse_response;
|
||||
use mcp_test_support::format_with_current_shell;
|
||||
|
||||
// Allow ample time on slower CI or under load to avoid flakes.
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
|
||||
@@ -71,13 +72,16 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
"-c".to_string(),
|
||||
format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
|
||||
];
|
||||
let expected_shell_command = format_with_current_shell(&format!(
|
||||
"python3 -c \"import pathlib; pathlib.Path('{created_filename}').touch()\""
|
||||
));
|
||||
|
||||
let McpHandle {
|
||||
process: mut mcp_process,
|
||||
server: _server,
|
||||
dir: _dir,
|
||||
} = create_mcp_process(vec![
|
||||
create_shell_sse_response(
|
||||
create_shell_command_sse_response(
|
||||
shell_command.clone(),
|
||||
Some(workdir_for_shell_function_call.path()),
|
||||
Some(5_000),
|
||||
@@ -111,7 +115,7 @@ async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
)?;
|
||||
let expected_elicitation_request = create_expected_elicitation_request(
|
||||
elicitation_request_id.clone(),
|
||||
shell_command.clone(),
|
||||
expected_shell_command,
|
||||
workdir_for_shell_function_call.path(),
|
||||
codex_request_id.to_string(),
|
||||
params.codex_event_id.clone(),
|
||||
@@ -218,6 +222,12 @@ async fn test_patch_approval_triggers_elicitation() {
|
||||
}
|
||||
|
||||
async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
if cfg!(windows) {
|
||||
// powershell apply_patch shell calls are not parsed into apply patch approvals
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let cwd = TempDir::new()?;
|
||||
let test_file = cwd.path().join("destination_file.txt");
|
||||
std::fs::write(&test_file, "original content\n")?;
|
||||
|
||||
@@ -88,7 +88,6 @@ impl OtelEventManager {
|
||||
reasoning_effort: Option<ReasoningEffort>,
|
||||
reasoning_summary: ReasoningSummary,
|
||||
context_window: Option<i64>,
|
||||
max_output_tokens: Option<i64>,
|
||||
auto_compact_token_limit: Option<i64>,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
@@ -111,7 +110,6 @@ impl OtelEventManager {
|
||||
reasoning_effort = reasoning_effort.map(|e| e.to_string()),
|
||||
reasoning_summary = %reasoning_summary,
|
||||
context_window = context_window,
|
||||
max_output_tokens = max_output_tokens,
|
||||
auto_compact_token_limit = auto_compact_token_limit,
|
||||
approval_policy = %approval_policy,
|
||||
sandbox_policy = %sandbox_policy,
|
||||
|
||||
@@ -230,8 +230,24 @@ pub struct LocalShellExecAction {
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum WebSearchAction {
|
||||
Search {
|
||||
query: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
query: Option<String>,
|
||||
},
|
||||
OpenPage {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
url: Option<String>,
|
||||
},
|
||||
FindInPage {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
url: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
pattern: Option<String>,
|
||||
},
|
||||
|
||||
#[serde(other)]
|
||||
Other,
|
||||
}
|
||||
@@ -322,10 +338,6 @@ pub struct ShellToolCallParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
|
||||
@@ -342,10 +354,6 @@ pub struct ShellCommandToolCallParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// Responses API compatible content items that can be returned by a tool call.
|
||||
@@ -642,6 +650,72 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrips_web_search_call_actions() -> Result<()> {
|
||||
let cases = vec![
|
||||
(
|
||||
r#"{
|
||||
"type": "web_search_call",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "search",
|
||||
"query": "weather seattle"
|
||||
}
|
||||
}"#,
|
||||
WebSearchAction::Search {
|
||||
query: Some("weather seattle".into()),
|
||||
},
|
||||
Some("completed".into()),
|
||||
),
|
||||
(
|
||||
r#"{
|
||||
"type": "web_search_call",
|
||||
"status": "open",
|
||||
"action": {
|
||||
"type": "open_page",
|
||||
"url": "https://example.com"
|
||||
}
|
||||
}"#,
|
||||
WebSearchAction::OpenPage {
|
||||
url: Some("https://example.com".into()),
|
||||
},
|
||||
Some("open".into()),
|
||||
),
|
||||
(
|
||||
r#"{
|
||||
"type": "web_search_call",
|
||||
"status": "in_progress",
|
||||
"action": {
|
||||
"type": "find_in_page",
|
||||
"url": "https://example.com/docs",
|
||||
"pattern": "installation"
|
||||
}
|
||||
}"#,
|
||||
WebSearchAction::FindInPage {
|
||||
url: Some("https://example.com/docs".into()),
|
||||
pattern: Some("installation".into()),
|
||||
},
|
||||
Some("in_progress".into()),
|
||||
),
|
||||
];
|
||||
|
||||
for (json_literal, expected_action, expected_status) in cases {
|
||||
let parsed: ResponseItem = serde_json::from_str(json_literal)?;
|
||||
let expected = ResponseItem::WebSearchCall {
|
||||
id: None,
|
||||
status: expected_status.clone(),
|
||||
action: expected_action.clone(),
|
||||
};
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
let serialized = serde_json::to_value(&parsed)?;
|
||||
let original_value: serde_json::Value = serde_json::from_str(json_literal)?;
|
||||
assert_eq!(serialized, original_value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_shell_tool_call_params() -> Result<()> {
|
||||
let json = r#"{
|
||||
@@ -658,8 +732,6 @@ mod tests {
|
||||
timeout_ms: Some(1000),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
},
|
||||
params
|
||||
);
|
||||
|
||||
@@ -298,8 +298,12 @@ def create_tag_ref(version: str, tag_sha: str) -> None:
|
||||
|
||||
def determine_version(args: argparse.Namespace) -> str:
|
||||
latest_version = get_latest_release_version()
|
||||
major, minor, patch = parse_semver(latest_version)
|
||||
next_minor_version = format_version(major, minor + 1, patch)
|
||||
# When determining the next version after the current release,
|
||||
# we should always increment the minor version, but reset the
|
||||
# patch to zero. In practice, `patch` should only be non-zero if
|
||||
# --emergency-version-override was used.
|
||||
major, minor, _patch = parse_semver(latest_version)
|
||||
next_minor_version = format_version(major, minor + 1, 0)
|
||||
|
||||
if args.publish_release:
|
||||
return next_minor_version
|
||||
|
||||
@@ -18,6 +18,13 @@ pub fn compute_allow_paths(
|
||||
allow.push(p);
|
||||
}
|
||||
};
|
||||
let include_tmp_env_vars = matches!(
|
||||
policy,
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
exclude_tmpdir_env_var: false,
|
||||
..
|
||||
}
|
||||
);
|
||||
|
||||
if matches!(policy, SandboxPolicy::WorkspaceWrite { .. }) {
|
||||
add_path(command_cwd.to_path_buf());
|
||||
@@ -33,7 +40,7 @@ pub fn compute_allow_paths(
|
||||
}
|
||||
}
|
||||
}
|
||||
if !matches!(policy, SandboxPolicy::ReadOnly) {
|
||||
if include_tmp_env_vars {
|
||||
for key in ["TEMP", "TMP"] {
|
||||
if let Some(v) = env_map.get(key) {
|
||||
let abs = PathBuf::from(v);
|
||||
@@ -80,4 +87,32 @@ mod tests {
|
||||
"additional writable root should be allowed"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn excludes_tmp_env_vars_when_requested() {
|
||||
let command_cwd = PathBuf::from(r"C:\Workspace");
|
||||
let temp_dir = PathBuf::from(r"C:\TempDir");
|
||||
let _ = fs::create_dir_all(&command_cwd);
|
||||
let _ = fs::create_dir_all(&temp_dir);
|
||||
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
let mut env_map = HashMap::new();
|
||||
env_map.insert("TEMP".into(), temp_dir.to_string_lossy().to_string());
|
||||
|
||||
let allow = compute_allow_paths(&policy, &command_cwd, &command_cwd, &env_map);
|
||||
|
||||
assert!(
|
||||
allow.iter().any(|p| p == &command_cwd),
|
||||
"command cwd should be allowed"
|
||||
);
|
||||
assert!(
|
||||
!allow.iter().any(|p| p == &temp_dir),
|
||||
"TEMP should be excluded when exclude_tmpdir_env_var is true"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,6 +71,10 @@ mod windows_impl {
|
||||
|
||||
type PipeHandles = ((HANDLE, HANDLE), (HANDLE, HANDLE), (HANDLE, HANDLE));
|
||||
|
||||
fn should_apply_network_block(policy: &SandboxPolicy) -> bool {
|
||||
!policy.has_full_network_access()
|
||||
}
|
||||
|
||||
fn ensure_dir(p: &Path) -> Result<()> {
|
||||
if let Some(d) = p.parent() {
|
||||
std::fs::create_dir_all(d)?;
|
||||
@@ -214,9 +218,12 @@ mod windows_impl {
|
||||
timeout_ms: Option<u64>,
|
||||
) -> Result<CaptureResult> {
|
||||
let policy = parse_policy(policy_json_or_preset)?;
|
||||
let apply_network_block = should_apply_network_block(&policy);
|
||||
normalize_null_device_env(&mut env_map);
|
||||
ensure_non_interactive_pager(&mut env_map);
|
||||
apply_no_network_to_env(&mut env_map)?;
|
||||
if apply_network_block {
|
||||
apply_no_network_to_env(&mut env_map)?;
|
||||
}
|
||||
ensure_codex_home_exists(codex_home)?;
|
||||
|
||||
let current_dir = cwd.to_path_buf();
|
||||
@@ -447,6 +454,36 @@ mod windows_impl {
|
||||
timed_out,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::should_apply_network_block;
|
||||
use crate::policy::SandboxPolicy;
|
||||
|
||||
fn workspace_policy(network_access: bool) -> SandboxPolicy {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: Vec::new(),
|
||||
network_access,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn applies_network_block_when_access_is_disabled() {
|
||||
assert!(should_apply_network_block(&workspace_policy(false)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_network_block_when_access_is_allowed() {
|
||||
assert!(!should_apply_network_block(&workspace_policy(true)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn applies_network_block_for_read_only() {
|
||||
assert!(should_apply_network_block(&SandboxPolicy::ReadOnly));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
|
||||
@@ -247,12 +247,6 @@ The size of the context window for the model, in tokens.
|
||||
|
||||
In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
|
||||
|
||||
### model_max_output_tokens
|
||||
|
||||
This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
|
||||
|
||||
> See also [`codex exec`](./exec.md) to see how these model settings influence non-interactive runs.
|
||||
|
||||
### oss_provider
|
||||
|
||||
Specifies the default OSS provider to use when running Codex. This is used when the `--oss` flag is provided without a specific provider.
|
||||
@@ -945,7 +939,6 @@ Valid values:
|
||||
| `model` | string | Model to use (e.g., `gpt-5.1-codex-max`). |
|
||||
| `model_provider` | string | Provider id from `model_providers` (default: `openai`). |
|
||||
| `model_context_window` | number | Context window tokens. |
|
||||
| `model_max_output_tokens` | number | Max output tokens. |
|
||||
| `tool_output_token_limit` | number | Token budget for stored function/tool outputs in history (default: 2,560 tokens). |
|
||||
| `approval_policy` | `untrusted` \| `on-failure` \| `on-request` \| `never` | When to prompt for approval. |
|
||||
| `sandbox_mode` | `read-only` \| `workspace-write` \| `danger-full-access` | OS sandbox policy. |
|
||||
|
||||
@@ -30,7 +30,6 @@ model_provider = "openai"
|
||||
# Optional manual model metadata. When unset, Codex auto-detects from model.
|
||||
# Uncomment to force values.
|
||||
# model_context_window = 128000 # tokens; default: auto for model
|
||||
# model_max_output_tokens = 8192 # tokens; default: auto for model
|
||||
# model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific
|
||||
# tool_output_token_limit = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex-max
|
||||
|
||||
|
||||
24
pnpm-lock.yaml
generated
24
pnpm-lock.yaml
generated
@@ -71,6 +71,30 @@ importers:
|
||||
specifier: ^3.24.6
|
||||
version: 3.24.6(zod@3.25.76)
|
||||
|
||||
shell-tool-mcp:
|
||||
devDependencies:
|
||||
'@types/jest':
|
||||
specifier: ^29.5.14
|
||||
version: 29.5.14
|
||||
'@types/node':
|
||||
specifier: ^20.19.18
|
||||
version: 20.19.18
|
||||
jest:
|
||||
specifier: ^29.7.0
|
||||
version: 29.7.0(@types/node@20.19.18)(ts-node@10.9.2(@types/node@20.19.18)(typescript@5.9.2))
|
||||
prettier:
|
||||
specifier: ^3.6.2
|
||||
version: 3.6.2
|
||||
ts-jest:
|
||||
specifier: ^29.3.4
|
||||
version: 29.4.4(@babel/core@7.28.4)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.28.4))(esbuild@0.25.10)(jest-util@29.7.0)(jest@29.7.0(@types/node@20.19.18)(ts-node@10.9.2(@types/node@20.19.18)(typescript@5.9.2)))(typescript@5.9.2)
|
||||
tsup:
|
||||
specifier: ^8.5.0
|
||||
version: 8.5.0(postcss@8.5.6)(typescript@5.9.2)(yaml@2.8.1)
|
||||
typescript:
|
||||
specifier: ^5.9.2
|
||||
version: 5.9.2
|
||||
|
||||
packages:
|
||||
|
||||
'@babel/code-frame@7.27.1':
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
packages:
|
||||
- docs
|
||||
- sdk/typescript
|
||||
- shell-tool-mcp
|
||||
|
||||
ignoredBuiltDependencies:
|
||||
- esbuild
|
||||
|
||||
2
shell-tool-mcp/.gitignore
vendored
Normal file
2
shell-tool-mcp/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/bin/
|
||||
/node_modules/
|
||||
31
shell-tool-mcp/README.md
Normal file
31
shell-tool-mcp/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# @openai/codex-shell-tool-mcp
|
||||
|
||||
This package wraps the `codex-exec-mcp-server` binary and its helpers so that the shell MCP can be invoked via `npx @openai/codex-shell-tool-mcp`. It bundles:
|
||||
|
||||
- `codex-exec-mcp-server` and `codex-execve-wrapper` built for macOS (arm64, x64) and Linux (musl arm64, musl x64).
|
||||
- A patched Bash that honors `BASH_EXEC_WRAPPER`, built for multiple glibc baselines (Ubuntu 24.04/22.04/20.04, Debian 12/11, CentOS-like 9) and macOS (15/14/13).
|
||||
- A launcher (`bin/mcp-server.js`) that picks the correct binaries for the current `process.platform` / `process.arch`, specifying `--execve` and `--bash` for the MCP, as appropriate.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
npx @openai/codex-shell-tool-mcp --help
|
||||
```
|
||||
|
||||
The launcher selects a Rust target triple based on the host and chooses the closest Bash variant by inspecting `/etc/os-release` on Linux or the Darwin major version on macOS.
|
||||
|
||||
## Patched Bash
|
||||
|
||||
We carry a small patch to `execute_cmd.c` (see `patches/bash-exec-wrapper.patch`) that adds support for `BASH_EXEC_WRAPPER`. The original commit message is “add support for BASH_EXEC_WRAPPER” and the patch applies cleanly to `a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b` from https://github.com/bminor/bash. To rebuild manually:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/bminor/bash
|
||||
git checkout a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git apply /path/to/patches/bash-exec-wrapper.patch
|
||||
./configure --without-bash-malloc
|
||||
make -j"$(nproc)"
|
||||
```
|
||||
|
||||
## Release workflow
|
||||
|
||||
`.github/workflows/shell-tool-mcp.yml` builds the Rust binaries, compiles the patched Bash variants, assembles the `vendor/` tree, and creates `codex-shell-tool-mcp-npm-<version>.tgz` for inclusion in the Rust GitHub Release. When the version is a stable or alpha tag, the workflow also publishes the tarball to npm using OIDC. The workflow is invoked from `rust-release.yml` so the package ships alongside other Codex artifacts.
|
||||
6
shell-tool-mcp/jest.config.cjs
Normal file
6
shell-tool-mcp/jest.config.cjs
Normal file
@@ -0,0 +1,6 @@
|
||||
/** @type {import('jest').Config} */
|
||||
module.exports = {
|
||||
preset: "ts-jest",
|
||||
testEnvironment: "node",
|
||||
roots: ["<rootDir>/tests"],
|
||||
};
|
||||
40
shell-tool-mcp/package.json
Normal file
40
shell-tool-mcp/package.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"name": "@openai/codex-shell-tool-mcp",
|
||||
"version": "0.0.0-dev",
|
||||
"description": "Codex MCP server for the shell tool with patched Bash and exec wrappers.",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"codex-shell-tool-mcp": "bin/mcp-server.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"files": [
|
||||
"bin",
|
||||
"vendor",
|
||||
"README.md"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/openai/codex.git",
|
||||
"directory": "shell-tool-mcp"
|
||||
},
|
||||
"scripts": {
|
||||
"clean": "rm -rf bin",
|
||||
"build": "tsup",
|
||||
"build:watch": "tsup --watch",
|
||||
"test": "jest",
|
||||
"test:watch": "jest --watch",
|
||||
"format": "prettier --check .",
|
||||
"format:fix": "prettier --write ."
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/node": "^20.19.18",
|
||||
"jest": "^29.7.0",
|
||||
"prettier": "^3.6.2",
|
||||
"ts-jest": "^29.3.4",
|
||||
"tsup": "^8.5.0",
|
||||
"typescript": "^5.9.2"
|
||||
}
|
||||
}
|
||||
24
shell-tool-mcp/patches/bash-exec-wrapper.patch
Normal file
24
shell-tool-mcp/patches/bash-exec-wrapper.patch
Normal file
@@ -0,0 +1,24 @@
|
||||
diff --git a/execute_cmd.c b/execute_cmd.c
|
||||
index 070f5119..d20ad2b9 100644
|
||||
--- a/execute_cmd.c
|
||||
+++ b/execute_cmd.c
|
||||
@@ -6129,6 +6129,19 @@ shell_execve (char *command, char **args, char **env)
|
||||
char sample[HASH_BANG_BUFSIZ];
|
||||
size_t larray;
|
||||
|
||||
+ char* exec_wrapper = getenv("BASH_EXEC_WRAPPER");
|
||||
+ if (exec_wrapper && *exec_wrapper && !whitespace (*exec_wrapper))
|
||||
+ {
|
||||
+ char *orig_command = command;
|
||||
+
|
||||
+ larray = strvec_len (args);
|
||||
+
|
||||
+ memmove (args + 2, args, (++larray) * sizeof (char *));
|
||||
+ args[0] = exec_wrapper;
|
||||
+ args[1] = orig_command;
|
||||
+ command = exec_wrapper;
|
||||
+ }
|
||||
+
|
||||
SETOSTYPE (0); /* Some systems use for USG/POSIX semantics */
|
||||
execve (command, args, env);
|
||||
i = errno; /* error from execve() */
|
||||
115
shell-tool-mcp/src/bashSelection.ts
Normal file
115
shell-tool-mcp/src/bashSelection.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import path from "node:path";
|
||||
import os from "node:os";
|
||||
import { DARWIN_BASH_VARIANTS, LINUX_BASH_VARIANTS } from "./constants";
|
||||
import { BashSelection, OsReleaseInfo } from "./types";
|
||||
|
||||
function supportedDetail(variants: ReadonlyArray<{ name: string }>): string {
|
||||
return `Supported variants: ${variants.map((variant) => variant.name).join(", ")}`;
|
||||
}
|
||||
|
||||
export function selectLinuxBash(
|
||||
bashRoot: string,
|
||||
info: OsReleaseInfo,
|
||||
): BashSelection {
|
||||
const versionId = info.versionId;
|
||||
const candidates: Array<{
|
||||
variant: (typeof LINUX_BASH_VARIANTS)[number];
|
||||
matchesVersion: boolean;
|
||||
}> = [];
|
||||
for (const variant of LINUX_BASH_VARIANTS) {
|
||||
const matchesId =
|
||||
variant.ids.includes(info.id) ||
|
||||
variant.ids.some((id) => info.idLike.includes(id));
|
||||
if (!matchesId) {
|
||||
continue;
|
||||
}
|
||||
const matchesVersion = Boolean(
|
||||
versionId &&
|
||||
variant.versions.some((prefix) => versionId.startsWith(prefix)),
|
||||
);
|
||||
candidates.push({ variant, matchesVersion });
|
||||
}
|
||||
|
||||
const pickVariant = (list: typeof candidates) =>
|
||||
list.find((item) => item.variant)?.variant;
|
||||
|
||||
const preferred = pickVariant(
|
||||
candidates.filter((item) => item.matchesVersion),
|
||||
);
|
||||
if (preferred) {
|
||||
return {
|
||||
path: path.join(bashRoot, preferred.name, "bash"),
|
||||
variant: preferred.name,
|
||||
};
|
||||
}
|
||||
|
||||
const fallbackMatch = pickVariant(candidates);
|
||||
if (fallbackMatch) {
|
||||
return {
|
||||
path: path.join(bashRoot, fallbackMatch.name, "bash"),
|
||||
variant: fallbackMatch.name,
|
||||
};
|
||||
}
|
||||
|
||||
const fallback = LINUX_BASH_VARIANTS[0];
|
||||
if (fallback) {
|
||||
return {
|
||||
path: path.join(bashRoot, fallback.name, "bash"),
|
||||
variant: fallback.name,
|
||||
};
|
||||
}
|
||||
|
||||
const detail = supportedDetail(LINUX_BASH_VARIANTS);
|
||||
throw new Error(
|
||||
`Unable to select a Bash variant for ${info.id || "unknown"} ${versionId || ""}. ${detail}`,
|
||||
);
|
||||
}
|
||||
|
||||
export function selectDarwinBash(
|
||||
bashRoot: string,
|
||||
darwinRelease: string,
|
||||
): BashSelection {
|
||||
const darwinMajor = Number.parseInt(darwinRelease.split(".")[0] || "0", 10);
|
||||
const preferred = DARWIN_BASH_VARIANTS.find(
|
||||
(variant) => darwinMajor >= variant.minDarwin,
|
||||
);
|
||||
if (preferred) {
|
||||
return {
|
||||
path: path.join(bashRoot, preferred.name, "bash"),
|
||||
variant: preferred.name,
|
||||
};
|
||||
}
|
||||
|
||||
const fallback = DARWIN_BASH_VARIANTS[0];
|
||||
if (fallback) {
|
||||
return {
|
||||
path: path.join(bashRoot, fallback.name, "bash"),
|
||||
variant: fallback.name,
|
||||
};
|
||||
}
|
||||
|
||||
const detail = supportedDetail(DARWIN_BASH_VARIANTS);
|
||||
throw new Error(
|
||||
`Unable to select a macOS Bash build (darwin ${darwinMajor}). ${detail}`,
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveBashPath(
|
||||
targetRoot: string,
|
||||
platform: NodeJS.Platform,
|
||||
darwinRelease = os.release(),
|
||||
osInfo: OsReleaseInfo | null = null,
|
||||
): BashSelection {
|
||||
const bashRoot = path.join(targetRoot, "bash");
|
||||
|
||||
if (platform === "linux") {
|
||||
if (!osInfo) {
|
||||
throw new Error("Linux OS info is required to select a Bash variant.");
|
||||
}
|
||||
return selectLinuxBash(bashRoot, osInfo);
|
||||
}
|
||||
if (platform === "darwin") {
|
||||
return selectDarwinBash(bashRoot, darwinRelease);
|
||||
}
|
||||
throw new Error(`Unsupported platform for Bash selection: ${platform}`);
|
||||
}
|
||||
20
shell-tool-mcp/src/constants.ts
Normal file
20
shell-tool-mcp/src/constants.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { DarwinBashVariant, LinuxBashVariant } from "./types";
|
||||
|
||||
export const LINUX_BASH_VARIANTS: ReadonlyArray<LinuxBashVariant> = [
|
||||
{ name: "ubuntu-24.04", ids: ["ubuntu"], versions: ["24.04"] },
|
||||
{ name: "ubuntu-22.04", ids: ["ubuntu"], versions: ["22.04"] },
|
||||
{ name: "ubuntu-20.04", ids: ["ubuntu"], versions: ["20.04"] },
|
||||
{ name: "debian-12", ids: ["debian"], versions: ["12"] },
|
||||
{ name: "debian-11", ids: ["debian"], versions: ["11"] },
|
||||
{
|
||||
name: "centos-9",
|
||||
ids: ["centos", "rhel", "rocky", "almalinux"],
|
||||
versions: ["9"],
|
||||
},
|
||||
];
|
||||
|
||||
export const DARWIN_BASH_VARIANTS: ReadonlyArray<DarwinBashVariant> = [
|
||||
{ name: "macos-15", minDarwin: 24 },
|
||||
{ name: "macos-14", minDarwin: 23 },
|
||||
{ name: "macos-13", minDarwin: 22 },
|
||||
];
|
||||
101
shell-tool-mcp/src/index.ts
Normal file
101
shell-tool-mcp/src/index.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
// Launches the codex-exec-mcp-server binary bundled in this package.
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { accessSync, constants } from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { resolveBashPath } from "./bashSelection";
|
||||
import { readOsRelease } from "./osRelease";
|
||||
import { resolveTargetTriple } from "./platform";
|
||||
|
||||
const scriptPath = process.argv[1]
|
||||
? path.resolve(process.argv[1])
|
||||
: process.cwd();
|
||||
const __dirname = path.dirname(scriptPath);
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const targetTriple = resolveTargetTriple(process.platform, process.arch);
|
||||
const vendorRoot = path.join(__dirname, "..", "vendor");
|
||||
const targetRoot = path.join(vendorRoot, targetTriple);
|
||||
const execveWrapperPath = path.join(targetRoot, "codex-execve-wrapper");
|
||||
const serverPath = path.join(targetRoot, "codex-exec-mcp-server");
|
||||
|
||||
const osInfo = process.platform === "linux" ? readOsRelease() : null;
|
||||
const { path: bashPath } = resolveBashPath(
|
||||
targetRoot,
|
||||
process.platform,
|
||||
os.release(),
|
||||
osInfo,
|
||||
);
|
||||
|
||||
[execveWrapperPath, serverPath, bashPath].forEach((checkPath) => {
|
||||
try {
|
||||
accessSync(checkPath, constants.F_OK);
|
||||
} catch {
|
||||
throw new Error(`Required binary missing: ${checkPath}`);
|
||||
}
|
||||
});
|
||||
|
||||
const args = [
|
||||
"--execve",
|
||||
execveWrapperPath,
|
||||
"--bash",
|
||||
bashPath,
|
||||
...process.argv.slice(2),
|
||||
];
|
||||
const child = spawn(serverPath, args, {
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
const forwardSignal = (signal: NodeJS.Signals) => {
|
||||
if (child.killed) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
child.kill(signal);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
(["SIGINT", "SIGTERM", "SIGHUP"] as const).forEach((sig) => {
|
||||
process.on(sig, () => forwardSignal(sig));
|
||||
});
|
||||
|
||||
child.on("error", (err) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
const childResult = await new Promise<
|
||||
| { type: "signal"; signal: NodeJS.Signals }
|
||||
| { type: "code"; exitCode: number }
|
||||
>((resolve) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
resolve({ type: "signal", signal });
|
||||
} else {
|
||||
resolve({ type: "code", exitCode: code ?? 1 });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (childResult.type === "signal") {
|
||||
// This environment running under `node --test` may not allow rethrowing a signal.
|
||||
// Wrap in a try to avoid masking the original termination reason.
|
||||
try {
|
||||
process.kill(process.pid, childResult.signal);
|
||||
} catch {
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
process.exit(childResult.exitCode);
|
||||
}
|
||||
}
|
||||
|
||||
void main().catch((err) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
34
shell-tool-mcp/src/osRelease.ts
Normal file
34
shell-tool-mcp/src/osRelease.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import { OsReleaseInfo } from "./types";
|
||||
|
||||
export function parseOsRelease(contents: string): OsReleaseInfo {
|
||||
const lines = contents.split("\n").filter(Boolean);
|
||||
const info: Record<string, string> = {};
|
||||
for (const line of lines) {
|
||||
const [rawKey, rawValue] = line.split("=", 2);
|
||||
if (!rawKey || rawValue === undefined) {
|
||||
continue;
|
||||
}
|
||||
const key = rawKey.toLowerCase();
|
||||
const value = rawValue.replace(/^"/, "").replace(/"$/, "");
|
||||
info[key] = value;
|
||||
}
|
||||
const idLike = (info.id_like || "")
|
||||
.split(/\s+/)
|
||||
.map((item) => item.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
return {
|
||||
id: (info.id || "").toLowerCase(),
|
||||
idLike,
|
||||
versionId: info.version_id || "",
|
||||
};
|
||||
}
|
||||
|
||||
export function readOsRelease(pathname = "/etc/os-release"): OsReleaseInfo {
|
||||
try {
|
||||
const contents = readFileSync(pathname, "utf8");
|
||||
return parseOsRelease(contents);
|
||||
} catch {
|
||||
return { id: "", idLike: [], versionId: "" };
|
||||
}
|
||||
}
|
||||
21
shell-tool-mcp/src/platform.ts
Normal file
21
shell-tool-mcp/src/platform.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
export function resolveTargetTriple(
|
||||
platform: NodeJS.Platform,
|
||||
arch: NodeJS.Architecture,
|
||||
): string {
|
||||
if (platform === "linux") {
|
||||
if (arch === "x64") {
|
||||
return "x86_64-unknown-linux-musl";
|
||||
}
|
||||
if (arch === "arm64") {
|
||||
return "aarch64-unknown-linux-musl";
|
||||
}
|
||||
} else if (platform === "darwin") {
|
||||
if (arch === "x64") {
|
||||
return "x86_64-apple-darwin";
|
||||
}
|
||||
if (arch === "arm64") {
|
||||
return "aarch64-apple-darwin";
|
||||
}
|
||||
}
|
||||
throw new Error(`Unsupported platform: ${platform} (${arch})`);
|
||||
}
|
||||
21
shell-tool-mcp/src/types.ts
Normal file
21
shell-tool-mcp/src/types.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
export type LinuxBashVariant = {
|
||||
name: string;
|
||||
ids: Array<string>;
|
||||
versions: Array<string>;
|
||||
};
|
||||
|
||||
export type DarwinBashVariant = {
|
||||
name: string;
|
||||
minDarwin: number;
|
||||
};
|
||||
|
||||
export type OsReleaseInfo = {
|
||||
id: string;
|
||||
idLike: Array<string>;
|
||||
versionId: string;
|
||||
};
|
||||
|
||||
export type BashSelection = {
|
||||
path: string;
|
||||
variant: string;
|
||||
};
|
||||
41
shell-tool-mcp/tests/bashSelection.test.ts
Normal file
41
shell-tool-mcp/tests/bashSelection.test.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
import { selectDarwinBash, selectLinuxBash } from "../src/bashSelection";
|
||||
import { DARWIN_BASH_VARIANTS, LINUX_BASH_VARIANTS } from "../src/constants";
|
||||
import { OsReleaseInfo } from "../src/types";
|
||||
import path from "node:path";
|
||||
|
||||
describe("selectLinuxBash", () => {
|
||||
const bashRoot = "/vendor/bash";
|
||||
|
||||
it("prefers exact version match when id is present", () => {
|
||||
const info: OsReleaseInfo = {
|
||||
id: "ubuntu",
|
||||
idLike: ["debian"],
|
||||
versionId: "24.04.1",
|
||||
};
|
||||
const selection = selectLinuxBash(bashRoot, info);
|
||||
expect(selection.variant).toBe("ubuntu-24.04");
|
||||
expect(selection.path).toBe(path.join(bashRoot, "ubuntu-24.04", "bash"));
|
||||
});
|
||||
|
||||
it("falls back to first supported variant when no matches", () => {
|
||||
const info: OsReleaseInfo = { id: "unknown", idLike: [], versionId: "1.0" };
|
||||
const selection = selectLinuxBash(bashRoot, info);
|
||||
expect(selection.variant).toBe(LINUX_BASH_VARIANTS[0].name);
|
||||
});
|
||||
});
|
||||
|
||||
describe("selectDarwinBash", () => {
|
||||
const bashRoot = "/vendor/bash";
|
||||
|
||||
it("selects compatible darwin version", () => {
|
||||
const darwinRelease = "24.0.0";
|
||||
const selection = selectDarwinBash(bashRoot, darwinRelease);
|
||||
expect(selection.variant).toBe("macos-15");
|
||||
});
|
||||
|
||||
it("falls back to first darwin variant when release too old", () => {
|
||||
const darwinRelease = "20.0.0";
|
||||
const selection = selectDarwinBash(bashRoot, darwinRelease);
|
||||
expect(selection.variant).toBe(DARWIN_BASH_VARIANTS[0].name);
|
||||
});
|
||||
});
|
||||
30
shell-tool-mcp/tests/osRelease.test.ts
Normal file
30
shell-tool-mcp/tests/osRelease.test.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { parseOsRelease } from "../src/osRelease";
|
||||
|
||||
describe("parseOsRelease", () => {
|
||||
it("parses basic fields", () => {
|
||||
const contents = `ID="ubuntu"
|
||||
ID_LIKE="debian"
|
||||
VERSION_ID=24.04
|
||||
OTHER=ignored`;
|
||||
|
||||
const info = parseOsRelease(contents);
|
||||
expect(info).toEqual({
|
||||
id: "ubuntu",
|
||||
idLike: ["debian"],
|
||||
versionId: "24.04",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles missing fields", () => {
|
||||
const contents = "SOMETHING=else";
|
||||
const info = parseOsRelease(contents);
|
||||
expect(info).toEqual({ id: "", idLike: [], versionId: "" });
|
||||
});
|
||||
|
||||
it("normalizes id_like entries", () => {
|
||||
const contents = `ID="rhel"
|
||||
ID_LIKE="CentOS Rocky"`;
|
||||
const info = parseOsRelease(contents);
|
||||
expect(info.idLike).toEqual(["centos", "rocky"]);
|
||||
});
|
||||
});
|
||||
13
shell-tool-mcp/tsconfig.json
Normal file
13
shell-tool-mcp/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "CommonJS",
|
||||
"moduleResolution": "Node",
|
||||
"noEmit": true,
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"skipLibCheck": true
|
||||
},
|
||||
"include": ["src", "tests"]
|
||||
}
|
||||
15
shell-tool-mcp/tsup.config.ts
Normal file
15
shell-tool-mcp/tsup.config.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { defineConfig } from "tsup";
|
||||
|
||||
export default defineConfig({
|
||||
entry: {
|
||||
"mcp-server": "src/index.ts",
|
||||
},
|
||||
outDir: "bin",
|
||||
format: ["cjs"],
|
||||
target: "node18",
|
||||
clean: true,
|
||||
sourcemap: false,
|
||||
banner: {
|
||||
js: "#!/usr/bin/env node",
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user