mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
Compare commits
107 Commits
pr1291
...
pakrym/tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f915166299 | ||
|
|
173386eeac | ||
|
|
4a57afaaf2 | ||
|
|
9f645353e9 | ||
|
|
db84722080 | ||
|
|
6e1838e0d8 | ||
|
|
4fc4e410bd | ||
|
|
6dd62ffa3b | ||
|
|
b4ab7c1b73 | ||
|
|
084236f717 | ||
|
|
bc944e77f5 | ||
|
|
591cb6149a | ||
|
|
d6c4083f98 | ||
|
|
3ef544fb95 | ||
|
|
01c0896f0f | ||
|
|
4082246f6a | ||
|
|
6d82907082 | ||
|
|
ed206d5687 | ||
|
|
d51654822f | ||
|
|
710f728124 | ||
|
|
6cf4b96f9d | ||
|
|
18b2b30841 | ||
|
|
d49d802b06 | ||
|
|
8a6c6cee88 | ||
|
|
8b590105de | ||
|
|
018003e52f | ||
|
|
11fd3123be | ||
|
|
e78ec00e73 | ||
|
|
a06d4f58e4 | ||
|
|
83eefb55fb | ||
|
|
9846adeabf | ||
|
|
d5a2148deb | ||
|
|
cc874c9205 | ||
|
|
6f2b01bb6b | ||
|
|
9cedeadf6a | ||
|
|
327e2254f6 | ||
|
|
e16657ca45 | ||
|
|
bb30ab9e96 | ||
|
|
6949329a7f | ||
|
|
b95a010e86 | ||
|
|
fcbcc40f51 | ||
|
|
643ab1f582 | ||
|
|
d3dbc10479 | ||
|
|
0bc7ee9193 | ||
|
|
2bd3314886 | ||
|
|
5b820c5ce7 | ||
|
|
f14b5adabf | ||
|
|
9c0b413fd1 | ||
|
|
3777e18243 | ||
|
|
0f8ac92390 | ||
|
|
c46bb67d77 | ||
|
|
94f5cad895 | ||
|
|
72504f1d9c | ||
|
|
fa6d507c51 | ||
|
|
a52a2fe7a9 | ||
|
|
bfeb8c92a5 | ||
|
|
9e58076cf5 | ||
|
|
8a424fcfa3 | ||
|
|
341c091c5b | ||
|
|
6b1e4a6846 | ||
|
|
75fa65e054 | ||
|
|
16eafd02ad | ||
|
|
c8051b906f | ||
|
|
82b0cebe8b | ||
|
|
3a23a86f4b | ||
|
|
268267b59e | ||
|
|
4a15ebc1ca | ||
|
|
8d35ad0ef7 | ||
|
|
cc58f1086d | ||
|
|
e444a50cf0 | ||
|
|
f80fc86f18 | ||
|
|
0b9cb2b9e7 | ||
|
|
e0c08cea4f | ||
|
|
0a44c42533 | ||
|
|
a9bed68947 | ||
|
|
fd67a0086c | ||
|
|
c221eab0b5 | ||
|
|
bd5a9e8ba9 | ||
|
|
abcca30d93 | ||
|
|
4cb3c76798 | ||
|
|
6dad5c3b17 | ||
|
|
cd2d84d496 | ||
|
|
688100f7f4 | ||
|
|
f30bf4bbcf | ||
|
|
1b7c8d2569 | ||
|
|
4a341efe92 | ||
|
|
e2efe8da9c | ||
|
|
5a0f236ca4 | ||
|
|
ff8ae1ffa1 | ||
|
|
b3ad764532 | ||
|
|
a331a67b3e | ||
|
|
2e293ce903 | ||
|
|
64feeb3803 | ||
|
|
fa0e17f83a | ||
|
|
a339a7bcce | ||
|
|
fcfe43c7df | ||
|
|
296996d74e | ||
|
|
50924101d2 | ||
|
|
72082164c1 | ||
|
|
e09691337d | ||
|
|
86d5a9d80d | ||
|
|
531ce7626f | ||
|
|
63363a54e5 | ||
|
|
6d65010aad | ||
|
|
0776d78357 | ||
|
|
ed5e848f3e | ||
|
|
5aafe190e2 |
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:22.04
|
||||
FROM ubuntu:24.04
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# enable 'universe' because musl-tools & clang live there
|
||||
@@ -11,19 +11,17 @@ RUN apt-get update && \
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl git ca-certificates \
|
||||
pkg-config clang musl-tools libssl-dev && \
|
||||
pkg-config clang musl-tools libssl-dev just && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# non-root dev user
|
||||
ARG USER=dev
|
||||
ARG UID=1000
|
||||
RUN useradd -m -u $UID $USER
|
||||
USER $USER
|
||||
# Ubuntu 24.04 ships with user 'ubuntu' already created with UID 1000.
|
||||
USER ubuntu
|
||||
|
||||
# install Rust + musl target as dev user
|
||||
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal && \
|
||||
~/.cargo/bin/rustup target add aarch64-unknown-linux-musl
|
||||
~/.cargo/bin/rustup target add aarch64-unknown-linux-musl && \
|
||||
~/.cargo/bin/rustup component add clippy rustfmt
|
||||
|
||||
ENV PATH="/home/${USER}/.cargo/bin:${PATH}"
|
||||
ENV PATH="/home/ubuntu/.cargo/bin:${PATH}"
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@@ -15,15 +15,13 @@
|
||||
"CARGO_TARGET_DIR": "${containerWorkspaceFolder}/codex-rs/target-arm64"
|
||||
},
|
||||
|
||||
"remoteUser": "dev",
|
||||
"remoteUser": "ubuntu",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"settings": {
|
||||
"terminal.integrated.defaultProfile.linux": "bash"
|
||||
"terminal.integrated.defaultProfile.linux": "bash"
|
||||
},
|
||||
"extensions": [
|
||||
"rust-lang.rust-analyzer"
|
||||
],
|
||||
"extensions": ["rust-lang.rust-analyzer", "tamasfe.even-better-toml"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
9
.github/actions/codex/action.yml
vendored
9
.github/actions/codex/action.yml
vendored
@@ -20,9 +20,9 @@ inputs:
|
||||
description: "Value to use as the CODEX_HOME environment variable when running Codex."
|
||||
required: false
|
||||
codex_release_tag:
|
||||
description: "The release tag of the Codex model to run."
|
||||
description: "The release tag of the Codex model to run, e.g., 'rust-v0.3.0'. Defaults to the latest release."
|
||||
required: false
|
||||
default: "codex-rs-ca8e97fcbcb991e542b8689f2d4eab9d30c399d6-1-rust-v0.0.2505302325"
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -84,7 +84,10 @@ runs:
|
||||
# we will need to update this action.yml file to match.
|
||||
artifact="codex-exec-${triple}.tar.gz"
|
||||
|
||||
gh release download ${{ inputs.codex_release_tag }} --repo openai/codex \
|
||||
TAG_ARG="${{ inputs.codex_release_tag }}"
|
||||
# The usage is `gh release download [<tag>] [flags]`, so if TAG_ARG
|
||||
# is empty, we do not pass it so we can default to the latest release.
|
||||
gh release download ${TAG_ARG:+$TAG_ARG} --repo openai/codex \
|
||||
--pattern "$artifact" --output - \
|
||||
| tar xzO > /usr/local/bin/codex-exec
|
||||
chmod +x /usr/local/bin/codex-exec
|
||||
|
||||
22
.github/actions/codex/bun.lock
vendored
22
.github/actions/codex/bun.lock
vendored
@@ -8,9 +8,9 @@
|
||||
"@actions/github": "^6.0.1",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"@types/bun": "^1.2.19",
|
||||
"@types/node": "^24.1.0",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.8.3",
|
||||
},
|
||||
},
|
||||
@@ -48,19 +48,23 @@
|
||||
|
||||
"@octokit/types": ["@octokit/types@13.10.0", "", { "dependencies": { "@octokit/openapi-types": "^24.2.0" } }, "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.2.13", "", { "dependencies": { "bun-types": "1.2.13" } }, "sha512-u6vXep/i9VBxoJl3GjZsl/BFIsvML8DfVDO0RYLEwtSZSp981kEO1V5NwRcO1CPJ7AmvpbnDCiMKo3JvbDEjAg=="],
|
||||
"@types/bun": ["@types/bun@1.2.19", "", { "dependencies": { "bun-types": "1.2.19" } }, "sha512-d9ZCmrH3CJ2uYKXQIUuZ/pUnTqIvLDS0SK7pFmbx8ma+ziH/FRMoAq5bYpRG7y+w1gl+HgyNZbtqgMq4W4e2Lg=="],
|
||||
|
||||
"@types/node": ["@types/node@22.15.21", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-EV/37Td6c+MgKAbkcLG6vqZ2zEYHD7bvSrzqqs2RIhbA6w3x+Dqz8MZM3sP6kGTeLrdoOgKZe+Xja7tUB2DNkQ=="],
|
||||
"@types/node": ["@types/node@24.1.0", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w=="],
|
||||
|
||||
"@types/react": ["@types/react@19.1.8", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g=="],
|
||||
|
||||
"before-after-hook": ["before-after-hook@2.2.3", "", {}, "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ=="],
|
||||
|
||||
"bun-types": ["bun-types@1.2.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-rRjA1T6n7wto4gxhAO/ErZEtOXyEZEmnIHQfl0Dt1QQSB4QV0iP6BZ9/YB5fZaHFQ2dwHFrmPaRQ9GGMX01k9Q=="],
|
||||
"bun-types": ["bun-types@1.2.19", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-uAOTaZSPuYsWIXRpj7o56Let0g/wjihKCkeRqUBhlLVM/Bt+Fj9xTo+LhC1OV1XDaGkz4hNC80et5xgy+9KTHQ=="],
|
||||
|
||||
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
||||
|
||||
"deprecation": ["deprecation@2.3.1", "", {}, "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ=="],
|
||||
|
||||
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
|
||||
|
||||
"prettier": ["prettier@3.5.3", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw=="],
|
||||
"prettier": ["prettier@3.6.2", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ=="],
|
||||
|
||||
"tunnel": ["tunnel@0.0.6", "", {}, "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg=="],
|
||||
|
||||
@@ -68,7 +72,7 @@
|
||||
|
||||
"undici": ["undici@5.29.0", "", { "dependencies": { "@fastify/busboy": "^2.0.0" } }, "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg=="],
|
||||
|
||||
"undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
|
||||
"undici-types": ["undici-types@7.8.0", "", {}, "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw=="],
|
||||
|
||||
"universal-user-agent": ["universal-user-agent@6.0.1", "", {}, "sha512-yCzhz6FN2wU1NiiQRogkTQszlQSlpWaw8SvVegAc+bDxbzHgh1vX8uIe8OYyMH6DwH+sdTJsgMl36+mSMdRJIQ=="],
|
||||
|
||||
@@ -78,6 +82,8 @@
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types": ["@octokit/types@12.6.0", "", { "dependencies": { "@octokit/openapi-types": "^20.0.0" } }, "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw=="],
|
||||
|
||||
"bun-types/@types/node": ["@types/node@24.0.13", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-Qm9OYVOFHFYg3wJoTSrz80hoec5Lia/dPp84do3X7dZvLikQvM1YpmvTBEdIr/e+U8HTkFjLHLnl78K/qjf+jQ=="],
|
||||
|
||||
"@octokit/plugin-paginate-rest/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
|
||||
6
.github/actions/codex/package.json
vendored
6
.github/actions/codex/package.json
vendored
@@ -13,9 +13,9 @@
|
||||
"@actions/github": "^6.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"@types/bun": "^1.2.19",
|
||||
"@types/node": "^24.1.0",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.8.3"
|
||||
}
|
||||
}
|
||||
|
||||
26
.github/dependabot.yaml
vendored
Normal file
26
.github/dependabot.yaml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/dependabot-options-reference#package-ecosystem-
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: bun
|
||||
directory: .github/actions/codex
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: cargo
|
||||
directories:
|
||||
- codex-rs
|
||||
- codex-rs/*
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: devcontainers
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: docker
|
||||
directory: codex-cli
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
9
.github/workflows/ci.yml
vendored
9
.github/workflows/ci.yml
vendored
@@ -74,7 +74,12 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: pnpm stage-release
|
||||
|
||||
- name: Ensure README.md contains only ASCII and certain Unicode code points
|
||||
- name: Ensure root README.md contains only ASCII and certain Unicode code points
|
||||
run: ./scripts/asciicheck.py README.md
|
||||
- name: Check README ToC
|
||||
- name: Check root README ToC
|
||||
run: python3 scripts/readme_toc.py README.md
|
||||
|
||||
- name: Ensure codex-cli/README.md contains only ASCII and certain Unicode code points
|
||||
run: ./scripts/asciicheck.py codex-cli/README.md
|
||||
- name: Check codex-cli/README ToC
|
||||
run: python3 scripts/readme_toc.py codex-cli/README.md
|
||||
|
||||
2
.github/workflows/codex.yml
vendored
2
.github/workflows/codex.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: x86_64-unknown-linux-gnu
|
||||
components: clippy
|
||||
|
||||
4
.github/workflows/rust-ci.yml
vendored
4
.github/workflows/rust-ci.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: cargo fmt
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
components: clippy
|
||||
|
||||
31
.github/workflows/rust-release.yml
vendored
31
.github/workflows/rust-release.yml
vendored
@@ -15,9 +15,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
TAG_REGEX: '^rust-v[0-9]+\.[0-9]+\.[0-9]+$'
|
||||
|
||||
jobs:
|
||||
tag-check:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -33,8 +30,8 @@ jobs:
|
||||
# 1. Must be a tag and match the regex
|
||||
[[ "${GITHUB_REF_TYPE}" == "tag" ]] \
|
||||
|| { echo "❌ Not a tag push"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ${TAG_REGEX} ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' != ${TAG_REGEX}"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ^rust-v[0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta)(\.[0-9]+)?)?$ ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' doesn't match expected format"; exit 1; }
|
||||
|
||||
# 2. Extract versions
|
||||
tag_ver="${GITHUB_REF_NAME#rust-v}"
|
||||
@@ -76,7 +73,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
@@ -96,7 +93,7 @@ jobs:
|
||||
sudo apt install -y musl-tools pkg-config
|
||||
|
||||
- name: Cargo build
|
||||
run: cargo build --target ${{ matrix.target }} --release --all-targets --all-features
|
||||
run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-exec --bin codex-linux-sandbox
|
||||
|
||||
- name: Stage artifacts
|
||||
shell: bash
|
||||
@@ -160,9 +157,7 @@ jobs:
|
||||
release:
|
||||
needs: build
|
||||
name: release
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
RELEASE_TAG: codex-rs-${{ github.sha }}-${{ github.run_attempt }}-${{ github.ref_name }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
@@ -172,9 +167,19 @@ jobs:
|
||||
- name: List
|
||||
run: ls -R dist/
|
||||
|
||||
- uses: softprops/action-gh-release@v2
|
||||
- name: Define release name
|
||||
id: release_name
|
||||
run: |
|
||||
# Extract the version from the tag name, which is in the format
|
||||
# "rust-v0.1.0".
|
||||
version="${GITHUB_REF_NAME#rust-v}"
|
||||
echo "name=${version}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.RELEASE_TAG }}
|
||||
name: ${{ steps.release_name.outputs.name }}
|
||||
tag_name: ${{ github.ref_name }}
|
||||
files: dist/**
|
||||
# For now, tag releases as "prerelease" because we are not claiming
|
||||
# the Rust CLI is stable yet.
|
||||
@@ -184,5 +189,5 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag: ${{ env.RELEASE_TAG }}
|
||||
tag: ${{ github.ref_name }}
|
||||
config: .github/dotslash-config.json
|
||||
|
||||
18
.vscode/launch.json
vendored
Normal file
18
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Cargo launch",
|
||||
"cargo": {
|
||||
"cwd": "${workspaceFolder}/codex-rs",
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=codex-tui"
|
||||
]
|
||||
},
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
||||
10
.vscode/settings.json
vendored
Normal file
10
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"rust-analyzer.checkOnSave": true,
|
||||
"rust-analyzer.check.command": "clippy",
|
||||
"rust-analyzer.check.extraArgs": ["--all-features", "--tests"],
|
||||
"rust-analyzer.rustfmt.extraArgs": ["--config", "imports_granularity=Item"],
|
||||
"[rust]": {
|
||||
"editor.defaultFormatter": "rust-lang.rust-analyzer",
|
||||
"editor.formatOnSave": true,
|
||||
}
|
||||
}
|
||||
@@ -3,3 +3,7 @@
|
||||
In the codex-rs folder where the rust code lives:
|
||||
|
||||
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
|
||||
|
||||
Before creating a pull request with changes to `codex-rs`, run `just fmt` (in `codex-rs` directory) to format the code and `just fix` (in `codex-rs` directory) to fix any linter issues in the code, ensure the test suite passes by running `cargo test --all-features` in the `codex-rs` directory.
|
||||
|
||||
When making individual changes prefer running tests on individual files or projects first.
|
||||
|
||||
574
README.md
574
README.md
@@ -1,9 +1,11 @@
|
||||
<h1 align="center">OpenAI Codex CLI</h1>
|
||||
<p align="center">Lightweight coding agent that runs in your terminal</p>
|
||||
|
||||
<p align="center"><code>npm i -g @openai/codex</code></p>
|
||||
<p align="center"><code>npm i -g @openai/codex</code><br />or <code>brew install codex</code></p>
|
||||
|
||||

|
||||
This is the home of the **Codex CLI**, which is a coding agent from OpenAI that runs locally on your computer. If you are looking for the _cloud-based agent_ from OpenAI, **Codex [Web]**, see <https://chatgpt.com/codex>.
|
||||
|
||||
<!--  -->
|
||||
|
||||
---
|
||||
|
||||
@@ -13,7 +15,9 @@
|
||||
<!-- Begin ToC -->
|
||||
|
||||
- [Experimental technology disclaimer](#experimental-technology-disclaimer)
|
||||
- [Quickstart](#quickstart)
|
||||
- [Quick start](#quickstart)
|
||||
- [OpenAI API Users](#openai-api-users)
|
||||
- [OpenAI Plus/Pro Users](#openai-pluspro-users)
|
||||
- [Why Codex?](#why-codex)
|
||||
- [Security model & permissions](#security-model--permissions)
|
||||
- [Platform sandboxing details](#platform-sandboxing-details)
|
||||
@@ -21,24 +25,17 @@
|
||||
- [CLI reference](#cli-reference)
|
||||
- [Memory & project docs](#memory--project-docs)
|
||||
- [Non-interactive / CI mode](#non-interactive--ci-mode)
|
||||
- [Model Context Protocol (MCP)](#model-context-protocol-mcp)
|
||||
- [Tracing / verbose logging](#tracing--verbose-logging)
|
||||
- [Recipes](#recipes)
|
||||
- [Installation](#installation)
|
||||
- [Configuration guide](#configuration-guide)
|
||||
- [Basic configuration parameters](#basic-configuration-parameters)
|
||||
- [Custom AI provider configuration](#custom-ai-provider-configuration)
|
||||
- [History configuration](#history-configuration)
|
||||
- [Configuration examples](#configuration-examples)
|
||||
- [Full configuration example](#full-configuration-example)
|
||||
- [Custom instructions](#custom-instructions)
|
||||
- [Environment variables setup](#environment-variables-setup)
|
||||
- [DotSlash](#dotslash)
|
||||
- [Configuration](#configuration)
|
||||
- [FAQ](#faq)
|
||||
- [Zero data retention (ZDR) usage](#zero-data-retention-zdr-usage)
|
||||
- [Codex open source fund](#codex-open-source-fund)
|
||||
- [Contributing](#contributing)
|
||||
- [Development workflow](#development-workflow)
|
||||
- [Git hooks with Husky](#git-hooks-with-husky)
|
||||
- [Debugging](#debugging)
|
||||
- [Writing high-impact code changes](#writing-high-impact-code-changes)
|
||||
- [Opening a pull request](#opening-a-pull-request)
|
||||
- [Review process](#review-process)
|
||||
@@ -47,8 +44,6 @@
|
||||
- [Contributor license agreement (CLA)](#contributor-license-agreement-cla)
|
||||
- [Quick fixes](#quick-fixes)
|
||||
- [Releasing `codex`](#releasing-codex)
|
||||
- [Alternative build options](#alternative-build-options)
|
||||
- [Nix flake development](#nix-flake-development)
|
||||
- [Security & responsible AI](#security--responsible-ai)
|
||||
- [License](#license)
|
||||
|
||||
@@ -71,54 +66,94 @@ Help us improve by filing issues or submitting PRs (see the section below for ho
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install globally:
|
||||
Install globally with your preferred package manager:
|
||||
|
||||
```shell
|
||||
npm install -g @openai/codex
|
||||
npm install -g @openai/codex # Alternatively: `brew install codex`
|
||||
```
|
||||
|
||||
Or go to the [latest GitHub Release](https://github.com/openai/codex/releases/latest) and download the appropriate binary for your platform.
|
||||
|
||||
### OpenAI API Users
|
||||
|
||||
Next, set your OpenAI API key as an environment variable:
|
||||
|
||||
```shell
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
```
|
||||
|
||||
> **Note:** This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`) but we recommend setting for the session. **Tip:** You can also place your API key into a `.env` file at the root of your project:
|
||||
>
|
||||
> ```env
|
||||
> OPENAI_API_KEY=your-api-key-here
|
||||
> ```
|
||||
>
|
||||
> The CLI will automatically load variables from `.env` (via `dotenv/config`).
|
||||
> [!NOTE]
|
||||
> This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`), but we recommend setting it for the session.
|
||||
|
||||
### OpenAI Plus/Pro Users
|
||||
|
||||
If you have a paid OpenAI account, run the following to start the login process:
|
||||
|
||||
```
|
||||
codex login
|
||||
```
|
||||
|
||||
If you complete the process successfully, you should have a `~/.codex/auth.json` file that contains the credentials that Codex will use.
|
||||
|
||||
If you encounter problems with the login flow, please comment on <https://github.com/openai/codex/issues/1243>.
|
||||
|
||||
<details>
|
||||
<summary><strong>Use <code>--provider</code> to use other models</strong></summary>
|
||||
<summary><strong>Use <code>--profile</code> to use other models</strong></summary>
|
||||
|
||||
> Codex also allows you to use other providers that support the OpenAI Chat Completions API. You can set the provider in the config file or use the `--provider` flag. The possible options for `--provider` are:
|
||||
>
|
||||
> - openai (default)
|
||||
> - openrouter
|
||||
> - azure
|
||||
> - gemini
|
||||
> - ollama
|
||||
> - mistral
|
||||
> - deepseek
|
||||
> - xai
|
||||
> - groq
|
||||
> - arceeai
|
||||
> - any other provider that is compatible with the OpenAI API
|
||||
>
|
||||
> If you use a provider other than OpenAI, you will need to set the API key for the provider in the config file or in the environment variable as:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_API_KEY="your-api-key-here"
|
||||
> ```
|
||||
>
|
||||
> If you use a provider not listed above, you must also set the base URL for the provider:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_BASE_URL="https://your-provider-api-base-url"
|
||||
> ```
|
||||
Codex also allows you to use other providers that support the OpenAI Chat Completions (or Responses) API.
|
||||
|
||||
To do so, you must first define custom [providers](./config.md#model_providers) in `~/.codex/config.toml`. For example, the provider for a standard Ollama setup would be defined as follows:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
```
|
||||
|
||||
The `base_url` will have `/chat/completions` appended to it to build the full URL for the request.
|
||||
|
||||
For providers that also require an `Authorization` header of the form `Bearer: SECRET`, an `env_key` can be specified, which indicates the environment variable to read to use as the value of `SECRET` when making a request:
|
||||
|
||||
```toml
|
||||
[model_providers.openrouter]
|
||||
name = "OpenRouter"
|
||||
base_url = "https://openrouter.ai/api/v1"
|
||||
env_key = "OPENROUTER_API_KEY"
|
||||
```
|
||||
|
||||
Providers that speak the Responses API are also supported by adding `wire_api = "responses"` as part of the definition. Accessing OpenAI models via Azure is an example of such a provider, though it also requires specifying additional `query_params` that need to be appended to the request URL:
|
||||
|
||||
```toml
|
||||
[model_providers.azure]
|
||||
name = "Azure"
|
||||
# Make sure you set the appropriate subdomain for this URL.
|
||||
base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY" # Or "OPENAI_API_KEY", whichever you use.
|
||||
# Newer versions appear to support the responses API, see https://github.com/openai/codex/pull/1321
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
wire_api = "responses"
|
||||
```
|
||||
|
||||
Once you have defined a provider you wish to use, you can configure it as your default provider as follows:
|
||||
|
||||
```toml
|
||||
model_provider = "azure"
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> If you find yourself experimenting with a variety of models and providers, then you likely want to invest in defining a _profile_ for each configuration like so:
|
||||
|
||||
```toml
|
||||
[profiles.o3]
|
||||
model_provider = "azure"
|
||||
model = "o3"
|
||||
|
||||
[profiles.mistral]
|
||||
model_provider = "ollama"
|
||||
model = "mistral"
|
||||
```
|
||||
|
||||
This way, you can specify one command-line argument (.e.g., `--profile o3`, `--profile mistral`) to override multiple settings together.
|
||||
|
||||
</details>
|
||||
<br />
|
||||
@@ -136,7 +171,7 @@ codex "explain this codebase to me"
|
||||
```
|
||||
|
||||
```shell
|
||||
codex --approval-mode full-auto "create the fanciest todo-list app"
|
||||
codex --full-auto "create the fanciest todo-list app"
|
||||
```
|
||||
|
||||
That's it - Codex will scaffold a file, run it inside a sandbox, install any
|
||||
@@ -162,41 +197,35 @@ And it's **fully open-source** so you can see and contribute to how it develops!
|
||||
|
||||
## Security model & permissions
|
||||
|
||||
Codex lets you decide _how much autonomy_ the agent receives and auto-approval policy via the
|
||||
`--approval-mode` flag (or the interactive onboarding prompt):
|
||||
Codex lets you decide _how much autonomy_ you want to grant the agent. The following options can be configured independently:
|
||||
|
||||
| Mode | What the agent may do without asking | Still requires approval |
|
||||
| ------------------------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **Suggest** <br>(default) | <li>Read any file in the repo | <li>**All** file writes/patches<li> **Any** arbitrary shell commands (aside from reading files) |
|
||||
| **Auto Edit** | <li>Read **and** apply-patch writes to files | <li>**All** shell commands |
|
||||
| **Full Auto** | <li>Read/write files <li> Execute shell commands (network disabled, writes limited to your workdir) | - |
|
||||
- [`approval_policy`](./codex-rs/config.md#approval_policy) determines when you should be prompted to approve whether Codex can execute a command
|
||||
- [`sandbox`](./codex-rs/config.md#sandbox) determines the _sandbox policy_ that Codex uses to execute untrusted commands
|
||||
|
||||
In **Full Auto** every command is run **network-disabled** and confined to the
|
||||
current working directory (plus temporary files) for defense-in-depth. Codex
|
||||
will also show a warning/confirmation if you start in **auto-edit** or
|
||||
**full-auto** while the directory is _not_ tracked by Git, so you always have a
|
||||
safety net.
|
||||
By default, Codex runs with `--ask-for-approval untrusted` and `--sandbox read-only`, which means that:
|
||||
|
||||
Coming soon: you'll be able to whitelist specific commands to auto-execute with
|
||||
the network enabled, once we're confident in additional safeguards.
|
||||
- The user is prompted to approve every command not on the set of "trusted" commands built into Codex (`cat`, `ls`, etc.)
|
||||
- Approved commands are run outside of a sandbox because user approval implies "trust," in this case.
|
||||
|
||||
Running Codex with the `--full-auto` convenience flag changes the configuration to `--ask-for-approval on-failure` and `--sandbox workspace-write`, which means that:
|
||||
|
||||
- Codex does not initially ask for user approval before running an individual command.
|
||||
- Though when it runs a command, it is run under a sandbox in which:
|
||||
- It can read any file on the system.
|
||||
- It can only write files under the current directory (or the directory specified via `--cd`).
|
||||
- Network requests are completely disabled.
|
||||
- Only if the command exits with a non-zero exit code will it ask the user for approval. If granted, it will re-attempt the command outside of the sandbox. (A common case is when Codex cannot `npm install` a dependency because that requires network access.)
|
||||
|
||||
Again, these two options can be configured independently. For example, if you want Codex to perform an "exploration" where you are happy for it to read anything it wants but you never want to be prompted, you could run Codex with `--ask-for-approval never` and `--sandbox read-only`.
|
||||
|
||||
### Platform sandboxing details
|
||||
|
||||
The hardening mechanism Codex uses depends on your OS:
|
||||
The mechanism Codex uses to implement the sandbox policy depends on your OS:
|
||||
|
||||
- **macOS 12+** - commands are wrapped with **Apple Seatbelt** (`sandbox-exec`).
|
||||
- **macOS 12+** uses **Apple Seatbelt** and runs commands using `sandbox-exec` with a profile (`-p`) that corresponds to the `--sandbox` that was specified.
|
||||
- **Linux** uses a combination of Landlock/seccomp APIs to enforce the `sandbox` configuration.
|
||||
|
||||
- Everything is placed in a read-only jail except for a small set of
|
||||
writable roots (`$PWD`, `$TMPDIR`, `~/.codex`, etc.).
|
||||
- Outbound network is _fully blocked_ by default - even if a child process
|
||||
tries to `curl` somewhere it will fail.
|
||||
|
||||
- **Linux** - there is no sandboxing by default.
|
||||
We recommend using Docker for sandboxing, where Codex launches itself inside a **minimal
|
||||
container image** and mounts your repo _read/write_ at the same path. A
|
||||
custom `iptables`/`ipset` firewall script denies all egress except the
|
||||
OpenAI API. This gives you deterministic, reproducible runs without needing
|
||||
root on the host. You can use the [`run_in_container.sh`](./codex-cli/scripts/run_in_container.sh) script to set up the sandbox.
|
||||
Note that when running Linux in a containerized environment such as Docker, sandboxing may not work if the host/container configuration does not support the necessary Landlock/seccomp APIs. In such cases, we recommend configuring your Docker container so that it provides the sandbox guarantees you are looking for and then running `codex` with `--sandbox danger-full-access` (or, more simply, the `--dangerously-bypass-approvals-and-sandbox` flag) within your container.
|
||||
|
||||
---
|
||||
|
||||
@@ -205,24 +234,20 @@ The hardening mechanism Codex uses depends on your OS:
|
||||
| Requirement | Details |
|
||||
| --------------------------- | --------------------------------------------------------------- |
|
||||
| Operating systems | macOS 12+, Ubuntu 20.04+/Debian 10+, or Windows 11 **via WSL2** |
|
||||
| Node.js | **22 or newer** (LTS recommended) |
|
||||
| Git (optional, recommended) | 2.23+ for built-in PR helpers |
|
||||
| RAM | 4-GB minimum (8-GB recommended) |
|
||||
|
||||
> Never run `sudo npm install -g`; fix npm permissions instead.
|
||||
|
||||
---
|
||||
|
||||
## CLI reference
|
||||
|
||||
| Command | Purpose | Example |
|
||||
| ------------------------------------ | ----------------------------------- | ------------------------------------ |
|
||||
| `codex` | Interactive REPL | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive REPL | `codex "fix lint errors"` |
|
||||
| `codex -q "..."` | Non-interactive "quiet mode" | `codex -q --json "explain utils.ts"` |
|
||||
| `codex completion <bash\|zsh\|fish>` | Print shell completion script | `codex completion bash` |
|
||||
| Command | Purpose | Example |
|
||||
| ------------------ | ---------------------------------- | ------------------------------- |
|
||||
| `codex` | Interactive TUI | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive TUI | `codex "fix lint errors"` |
|
||||
| `codex exec "..."` | Non-interactive "automation mode" | `codex exec "explain utils.ts"` |
|
||||
|
||||
Key flags: `--model/-m`, `--approval-mode/-a`, `--quiet/-q`, and `--notify`.
|
||||
Key flags: `--model/-m`, `--ask-for-approval/-a`.
|
||||
|
||||
---
|
||||
|
||||
@@ -234,8 +259,6 @@ You can give Codex extra instructions and guidance using `AGENTS.md` files. Code
|
||||
2. `AGENTS.md` at repo root - shared project notes
|
||||
3. `AGENTS.md` in the current working directory - sub-folder/feature specifics
|
||||
|
||||
Disable loading of these files with `--no-project-doc` or the environment variable `CODEX_DISABLE_PROJECT_DOC=1`.
|
||||
|
||||
---
|
||||
|
||||
## Non-interactive / CI mode
|
||||
@@ -247,18 +270,37 @@ Run Codex head-less in pipelines. Example GitHub Action step:
|
||||
run: |
|
||||
npm install -g @openai/codex
|
||||
export OPENAI_API_KEY="${{ secrets.OPENAI_KEY }}"
|
||||
codex -a auto-edit --quiet "update CHANGELOG for next release"
|
||||
codex exec --full-auto "update CHANGELOG for next release"
|
||||
```
|
||||
|
||||
Set `CODEX_QUIET_MODE=1` to silence interactive UI noise.
|
||||
## Model Context Protocol (MCP)
|
||||
|
||||
The Codex CLI can be configured to leverage MCP servers by defining an [`mcp_servers`](./codex-rs/config.md#mcp_servers) section in `~/.codex/config.toml`. It is intended to mirror how tools such as Claude and Cursor define `mcpServers` in their respective JSON config files, though the Codex format is slightly different since it uses TOML rather than JSON, e.g.:
|
||||
|
||||
```toml
|
||||
# IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`.
|
||||
[mcp_servers.server-name]
|
||||
command = "npx"
|
||||
args = ["-y", "mcp-server"]
|
||||
env = { "API_KEY" = "value" }
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> It is somewhat experimental, but the Codex CLI can also be run as an MCP _server_ via `codex mcp`. If you launch it with an MCP client such as `npx @modelcontextprotocol/inspector codex mcp` and send it a `tools/list` request, you will see that there is only one tool, `codex`, that accepts a grab-bag of inputs, including a catch-all `config` map for anything you might want to override. Feel free to play around with it and provide feedback via GitHub issues.
|
||||
|
||||
## Tracing / verbose logging
|
||||
|
||||
Setting the environment variable `DEBUG=true` prints full API request and response details:
|
||||
Because Codex is written in Rust, it honors the `RUST_LOG` environment variable to configure its logging behavior.
|
||||
|
||||
The TUI defaults to `RUST_LOG=codex_core=info,codex_tui=info` and log messages are written to `~/.codex/log/codex-tui.log`, so you can leave the following running in a separate terminal to monitor log messages as they are written:
|
||||
|
||||
```shell
|
||||
DEBUG=true codex
|
||||
```
|
||||
tail -F ~/.codex/log/codex-tui.log
|
||||
```
|
||||
|
||||
By comparison, the non-interactive mode (`codex exec`) defaults to `RUST_LOG=error`, but messages are printed inline, so there is no need to monitor a separate file.
|
||||
|
||||
See the Rust documentation on [`RUST_LOG`](https://docs.rs/env_logger/latest/env_logger/#enabling-logging) for more information on the configuration options.
|
||||
|
||||
---
|
||||
|
||||
@@ -281,201 +323,78 @@ Below are a few bite-size examples you can copy-paste. Replace the text in quote
|
||||
## Installation
|
||||
|
||||
<details open>
|
||||
<summary><strong>From npm (Recommended)</strong></summary>
|
||||
<summary><strong>Install Codex CLI using your preferred package manager.</strong></summary>
|
||||
|
||||
From `brew` (recommended, downloads only the binary for your platform):
|
||||
|
||||
```bash
|
||||
npm install -g @openai/codex
|
||||
# or
|
||||
yarn global add @openai/codex
|
||||
# or
|
||||
bun install -g @openai/codex
|
||||
# or
|
||||
pnpm add -g @openai/codex
|
||||
brew install codex
|
||||
```
|
||||
|
||||
From `npm` (generally more readily available, but downloads binaries for all supported platforms):
|
||||
|
||||
```bash
|
||||
npm i -g @openai/codex
|
||||
```
|
||||
|
||||
Or go to the [latest GitHub Release](https://github.com/openai/codex/releases/latest) and download the appropriate binary for your platform.
|
||||
|
||||
Admittedly, each GitHub Release contains many executables, but in practice, you likely want one of these:
|
||||
|
||||
- macOS
|
||||
- Apple Silicon/arm64: `codex-aarch64-apple-darwin.tar.gz`
|
||||
- x86_64 (older Mac hardware): `codex-x86_64-apple-darwin.tar.gz`
|
||||
- Linux
|
||||
- x86_64: `codex-x86_64-unknown-linux-musl.tar.gz`
|
||||
- arm64: `codex-aarch64-unknown-linux-musl.tar.gz`
|
||||
|
||||
Each archive contains a single entry with the platform baked into the name (e.g., `codex-x86_64-unknown-linux-musl`), so you likely want to rename it to `codex` after extracting it.
|
||||
|
||||
### DotSlash
|
||||
|
||||
The GitHub Release also contains a [DotSlash](https://dotslash-cli.com/) file for the Codex CLI named `codex`. Using a DotSlash file makes it possible to make a lightweight commit to source control to ensure all contributors use the same version of an executable, regardless of what platform they use for development.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Build from source</strong></summary>
|
||||
|
||||
```bash
|
||||
# Clone the repository and navigate to the CLI package
|
||||
# Clone the repository and navigate to the root of the Cargo workspace.
|
||||
git clone https://github.com/openai/codex.git
|
||||
cd codex/codex-cli
|
||||
cd codex/codex-rs
|
||||
|
||||
# Enable corepack
|
||||
corepack enable
|
||||
# Install the Rust toolchain, if necessary.
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
source "$HOME/.cargo/env"
|
||||
rustup component add rustfmt
|
||||
rustup component add clippy
|
||||
|
||||
# Install dependencies and build
|
||||
pnpm install
|
||||
pnpm build
|
||||
# Build Codex.
|
||||
cargo build
|
||||
|
||||
# Linux-only: download prebuilt sandboxing binaries (requires gh and zstd).
|
||||
./scripts/install_native_deps.sh
|
||||
# Launch the TUI with a sample prompt.
|
||||
cargo run --bin codex -- "explain this codebase to me"
|
||||
|
||||
# Get the usage and the options
|
||||
node ./dist/cli.js --help
|
||||
# After making changes, ensure the code is clean.
|
||||
cargo fmt -- --config imports_granularity=Item
|
||||
cargo clippy --tests
|
||||
|
||||
# Run the locally-built CLI directly
|
||||
node ./dist/cli.js
|
||||
|
||||
# Or link the command globally for convenience
|
||||
pnpm link
|
||||
# Run the tests.
|
||||
cargo test
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Configuration guide
|
||||
## Configuration
|
||||
|
||||
Codex configuration files can be placed in the `~/.codex/` directory, supporting both YAML and JSON formats.
|
||||
Codex supports a rich set of configuration options documented in [`codex-rs/config.md`](./codex-rs/config.md).
|
||||
|
||||
### Basic configuration parameters
|
||||
By default, Codex loads its configuration from `~/.codex/config.toml`.
|
||||
|
||||
| Parameter | Type | Default | Description | Available Options |
|
||||
| ------------------- | ------- | ---------- | -------------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `model` | string | `o4-mini` | AI model to use | Any model name supporting OpenAI API |
|
||||
| `approvalMode` | string | `suggest` | AI assistant's permission mode | `suggest` (suggestions only)<br>`auto-edit` (automatic edits)<br>`full-auto` (fully automatic) |
|
||||
| `fullAutoErrorMode` | string | `ask-user` | Error handling in full-auto mode | `ask-user` (prompt for user input)<br>`ignore-and-continue` (ignore and proceed) |
|
||||
| `notify` | boolean | `true` | Enable desktop notifications | `true`/`false` |
|
||||
|
||||
### Custom AI provider configuration
|
||||
|
||||
In the `providers` object, you can configure multiple AI service providers. Each provider requires the following parameters:
|
||||
|
||||
| Parameter | Type | Description | Example |
|
||||
| --------- | ------ | --------------------------------------- | ----------------------------- |
|
||||
| `name` | string | Display name of the provider | `"OpenAI"` |
|
||||
| `baseURL` | string | API service URL | `"https://api.openai.com/v1"` |
|
||||
| `envKey` | string | Environment variable name (for API key) | `"OPENAI_API_KEY"` |
|
||||
|
||||
### History configuration
|
||||
|
||||
In the `history` object, you can configure conversation history settings:
|
||||
|
||||
| Parameter | Type | Description | Example Value |
|
||||
| ------------------- | ------- | ------------------------------------------------------ | ------------- |
|
||||
| `maxSize` | number | Maximum number of history entries to save | `1000` |
|
||||
| `saveHistory` | boolean | Whether to save history | `true` |
|
||||
| `sensitivePatterns` | array | Patterns of sensitive information to filter in history | `[]` |
|
||||
|
||||
### Configuration examples
|
||||
|
||||
1. YAML format (save as `~/.codex/config.yaml`):
|
||||
|
||||
```yaml
|
||||
model: o4-mini
|
||||
approvalMode: suggest
|
||||
fullAutoErrorMode: ask-user
|
||||
notify: true
|
||||
```
|
||||
|
||||
2. JSON format (save as `~/.codex/config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"approvalMode": "suggest",
|
||||
"fullAutoErrorMode": "ask-user",
|
||||
"notify": true
|
||||
}
|
||||
```
|
||||
|
||||
### Full configuration example
|
||||
|
||||
Below is a comprehensive example of `config.json` with multiple custom providers:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"provider": "openai",
|
||||
"providers": {
|
||||
"openai": {
|
||||
"name": "OpenAI",
|
||||
"baseURL": "https://api.openai.com/v1",
|
||||
"envKey": "OPENAI_API_KEY"
|
||||
},
|
||||
"azure": {
|
||||
"name": "AzureOpenAI",
|
||||
"baseURL": "https://YOUR_PROJECT_NAME.openai.azure.com/openai",
|
||||
"envKey": "AZURE_OPENAI_API_KEY"
|
||||
},
|
||||
"openrouter": {
|
||||
"name": "OpenRouter",
|
||||
"baseURL": "https://openrouter.ai/api/v1",
|
||||
"envKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"gemini": {
|
||||
"name": "Gemini",
|
||||
"baseURL": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
"envKey": "GEMINI_API_KEY"
|
||||
},
|
||||
"ollama": {
|
||||
"name": "Ollama",
|
||||
"baseURL": "http://localhost:11434/v1",
|
||||
"envKey": "OLLAMA_API_KEY"
|
||||
},
|
||||
"mistral": {
|
||||
"name": "Mistral",
|
||||
"baseURL": "https://api.mistral.ai/v1",
|
||||
"envKey": "MISTRAL_API_KEY"
|
||||
},
|
||||
"deepseek": {
|
||||
"name": "DeepSeek",
|
||||
"baseURL": "https://api.deepseek.com",
|
||||
"envKey": "DEEPSEEK_API_KEY"
|
||||
},
|
||||
"xai": {
|
||||
"name": "xAI",
|
||||
"baseURL": "https://api.x.ai/v1",
|
||||
"envKey": "XAI_API_KEY"
|
||||
},
|
||||
"groq": {
|
||||
"name": "Groq",
|
||||
"baseURL": "https://api.groq.com/openai/v1",
|
||||
"envKey": "GROQ_API_KEY"
|
||||
},
|
||||
"arceeai": {
|
||||
"name": "ArceeAI",
|
||||
"baseURL": "https://conductor.arcee.ai/v1",
|
||||
"envKey": "ARCEEAI_API_KEY"
|
||||
}
|
||||
},
|
||||
"history": {
|
||||
"maxSize": 1000,
|
||||
"saveHistory": true,
|
||||
"sensitivePatterns": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Custom instructions
|
||||
|
||||
You can create a `~/.codex/AGENTS.md` file to define custom guidance for the agent:
|
||||
|
||||
```markdown
|
||||
- Always respond with emojis
|
||||
- Only use git commands when explicitly requested
|
||||
```
|
||||
|
||||
### Environment variables setup
|
||||
|
||||
For each AI provider, you need to set the corresponding API key in your environment variables. For example:
|
||||
|
||||
```bash
|
||||
# OpenAI
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_API_KEY="your-azure-api-key-here"
|
||||
export AZURE_OPENAI_API_VERSION="2025-03-01-preview" (Optional)
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY="your-openrouter-key-here"
|
||||
|
||||
# Similarly for other providers
|
||||
```
|
||||
Though `--config` can be used to set/override ad-hoc config values for individual invocations of `codex`.
|
||||
|
||||
---
|
||||
|
||||
@@ -524,7 +443,13 @@ Codex CLI **does** support OpenAI organizations with [Zero Data Retention (ZDR)]
|
||||
OpenAI rejected the request. Error details: Status: 400, Code: unsupported_parameter, Type: invalid_request_error, Message: 400 Previous response cannot be used for this organization due to Zero Data Retention.
|
||||
```
|
||||
|
||||
You may need to upgrade to a more recent version with: `npm i -g @openai/codex@latest`
|
||||
Ensure you are running `codex` with `--config disable_response_storage=true` or add this line to `~/.codex/config.toml` to avoid specifying the command line option each time:
|
||||
|
||||
```toml
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
See [the configuration documentation on `disable_response_storage`](./codex-rs/config.md#disable_response_storage) for details.
|
||||
|
||||
---
|
||||
|
||||
@@ -549,51 +474,7 @@ More broadly we welcome contributions - whether you are opening your very first
|
||||
|
||||
- Create a _topic branch_ from `main` - e.g. `feat/interactive-prompt`.
|
||||
- Keep your changes focused. Multiple unrelated fixes should be opened as separate PRs.
|
||||
- Use `pnpm test:watch` during development for super-fast feedback.
|
||||
- We use **Vitest** for unit tests, **ESLint** + **Prettier** for style, and **TypeScript** for type-checking.
|
||||
- Before pushing, run the full test/type/lint suite:
|
||||
|
||||
### Git hooks with Husky
|
||||
|
||||
This project uses [Husky](https://typicode.github.io/husky/) to enforce code quality checks:
|
||||
|
||||
- **Pre-commit hook**: Automatically runs lint-staged to format and lint files before committing
|
||||
- **Pre-push hook**: Runs tests and type checking before pushing to the remote
|
||||
|
||||
These hooks help maintain code quality and prevent pushing code with failing tests. For more details, see [HUSKY.md](./codex-cli/HUSKY.md).
|
||||
|
||||
```bash
|
||||
pnpm test && pnpm run lint && pnpm run typecheck
|
||||
```
|
||||
|
||||
- If you have **not** yet signed the Contributor License Agreement (CLA), add a PR comment containing the exact text
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
The CLA-Assistant bot will turn the PR status green once all authors have signed.
|
||||
|
||||
```bash
|
||||
# Watch mode (tests rerun on change)
|
||||
pnpm test:watch
|
||||
|
||||
# Type-check without emitting files
|
||||
pnpm typecheck
|
||||
|
||||
# Automatically fix lint + prettier issues
|
||||
pnpm lint:fix
|
||||
pnpm format:fix
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
To debug the CLI with a visual debugger, do the following in the `codex-cli` folder:
|
||||
|
||||
- Run `pnpm run build` to build the CLI, which will generate `cli.js.map` alongside `cli.js` in the `dist` folder.
|
||||
- Run the CLI with `node --inspect-brk ./dist/cli.js` The program then waits until a debugger is attached before proceeding. Options:
|
||||
- In VS Code, choose **Debug: Attach to Node Process** from the command palette and choose the option in the dropdown with debug port `9229` (likely the first option)
|
||||
- Go to <chrome://inspect> in Chrome and find **localhost:9229** and click **trace**
|
||||
- Following the [development setup](#development-workflow) instructions above, ensure your change is free of lint warnings and test failures.
|
||||
|
||||
### Writing high-impact code changes
|
||||
|
||||
@@ -605,7 +486,7 @@ To debug the CLI with a visual debugger, do the following in the `codex-cli` fol
|
||||
### Opening a pull request
|
||||
|
||||
- Fill in the PR template (or include similar information) - **What? Why? How?**
|
||||
- Run **all** checks locally (`npm test && npm run lint && npm run typecheck`). CI failures that could have been caught locally slow down the process.
|
||||
- Run **all** checks locally (`cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item`). CI failures that could have been caught locally slow down the process.
|
||||
- Make sure your branch is up-to-date with `main` and that you have resolved merge conflicts.
|
||||
- Mark the PR as **Ready for review** only when you believe it is in a merge-able state.
|
||||
|
||||
@@ -652,73 +533,22 @@ The **DCO check** blocks merges until every commit in the PR carries the footer
|
||||
|
||||
### Releasing `codex`
|
||||
|
||||
To publish a new version of the CLI you first need to stage the npm package. A
|
||||
helper script in `codex-cli/scripts/` does all the heavy lifting. Inside the
|
||||
`codex-cli` folder run:
|
||||
_For admins only._
|
||||
|
||||
```bash
|
||||
# Classic, JS implementation that includes small, native binaries for Linux sandboxing.
|
||||
pnpm stage-release
|
||||
Make sure you are on `main` and have no local changes. Then run:
|
||||
|
||||
# Optionally specify the temp directory to reuse between runs.
|
||||
RELEASE_DIR=$(mktemp -d)
|
||||
pnpm stage-release --tmp "$RELEASE_DIR"
|
||||
|
||||
# "Fat" package that additionally bundles the native Rust CLI binaries for
|
||||
# Linux. End-users can then opt-in at runtime by setting CODEX_RUST=1.
|
||||
pnpm stage-release --native
|
||||
```shell
|
||||
VERSION=0.2.0 # Can also be 0.2.0-alpha.1 or any valid Rust version.
|
||||
./codex-rs/scripts/create_github_release.sh "$VERSION"
|
||||
```
|
||||
|
||||
Go to the folder where the release is staged and verify that it works as intended. If so, run the following from the temp folder:
|
||||
This will make a local commit on top of `main` with `version` set to `$VERSION` in `codex-rs/Cargo.toml` (note that on `main`, we leave the version as `version = "0.0.0"`).
|
||||
|
||||
```
|
||||
cd "$RELEASE_DIR"
|
||||
npm publish
|
||||
```
|
||||
This will push the commit using the tag `rust-v${VERSION}`, which in turn kicks off [the release workflow](.github/workflows/rust-release.yml). This will create a new GitHub Release named `$VERSION`.
|
||||
|
||||
### Alternative build options
|
||||
If everything looks good in the generated GitHub Release, uncheck the **pre-release** box so it is the latest release.
|
||||
|
||||
#### Nix flake development
|
||||
|
||||
Prerequisite: Nix >= 2.4 with flakes enabled (`experimental-features = nix-command flakes` in `~/.config/nix/nix.conf`).
|
||||
|
||||
Enter a Nix development shell:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix develop .#codex-cli # For entering codex-cli specific shell
|
||||
nix develop .#codex-rs # For entering codex-rs specific shell
|
||||
```
|
||||
|
||||
This shell includes Node.js, installs dependencies, builds the CLI, and provides a `codex` command alias.
|
||||
|
||||
Build and run the CLI directly:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix build .#codex-cli # For building codex-cli
|
||||
nix build .#codex-rs # For building codex-rs
|
||||
./result/bin/codex --help
|
||||
```
|
||||
|
||||
Run the CLI via the flake app:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix run .#codex-cli # For running codex-cli
|
||||
nix run .#codex-rs # For running codex-rs
|
||||
```
|
||||
|
||||
Use direnv with flakes
|
||||
|
||||
If you have direnv installed, you can use the following `.envrc` to automatically enter the Nix shell when you `cd` into the project directory:
|
||||
|
||||
```bash
|
||||
cd codex-rs
|
||||
echo "use flake ../flake.nix#codex-cli" >> .envrc && direnv allow
|
||||
cd codex-cli
|
||||
echo "use flake ../flake.nix#codex-rs" >> .envrc && direnv allow
|
||||
```
|
||||
Create a PR to update [`Formula/c/codex.rb`](https://github.com/Homebrew/homebrew-core/blob/main/Formula/c/codex.rb) on Homebrew.
|
||||
|
||||
---
|
||||
|
||||
|
||||
4
codex-cli/.gitignore
vendored
4
codex-cli/.gitignore
vendored
@@ -1,3 +1,7 @@
|
||||
# Added by ./scripts/install_native_deps.sh
|
||||
/bin/codex-aarch64-apple-darwin
|
||||
/bin/codex-aarch64-unknown-linux-musl
|
||||
/bin/codex-linux-sandbox-arm64
|
||||
/bin/codex-linux-sandbox-x64
|
||||
/bin/codex-x86_64-apple-darwin
|
||||
/bin/codex-x86_64-unknown-linux-musl
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM node:20-slim
|
||||
FROM node:24-slim
|
||||
|
||||
ARG TZ
|
||||
ENV TZ="$TZ"
|
||||
|
||||
736
codex-cli/README.md
Normal file
736
codex-cli/README.md
Normal file
@@ -0,0 +1,736 @@
|
||||
<h1 align="center">OpenAI Codex CLI</h1>
|
||||
<p align="center">Lightweight coding agent that runs in your terminal</p>
|
||||
|
||||
<p align="center"><code>npm i -g @openai/codex</code></p>
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This is the documentation for the _legacy_ TypeScript implementation of the Codex CLI. It has been superseded by the _Rust_ implementation. See the [README in the root of the Codex repository](https://github.com/openai/codex/blob/main/README.md) for details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
<details>
|
||||
<summary><strong>Table of contents</strong></summary>
|
||||
|
||||
<!-- Begin ToC -->
|
||||
|
||||
- [Experimental technology disclaimer](#experimental-technology-disclaimer)
|
||||
- [Quickstart](#quickstart)
|
||||
- [Why Codex?](#why-codex)
|
||||
- [Security model & permissions](#security-model--permissions)
|
||||
- [Platform sandboxing details](#platform-sandboxing-details)
|
||||
- [System requirements](#system-requirements)
|
||||
- [CLI reference](#cli-reference)
|
||||
- [Memory & project docs](#memory--project-docs)
|
||||
- [Non-interactive / CI mode](#non-interactive--ci-mode)
|
||||
- [Tracing / verbose logging](#tracing--verbose-logging)
|
||||
- [Recipes](#recipes)
|
||||
- [Installation](#installation)
|
||||
- [Configuration guide](#configuration-guide)
|
||||
- [Basic configuration parameters](#basic-configuration-parameters)
|
||||
- [Custom AI provider configuration](#custom-ai-provider-configuration)
|
||||
- [History configuration](#history-configuration)
|
||||
- [Configuration examples](#configuration-examples)
|
||||
- [Full configuration example](#full-configuration-example)
|
||||
- [Custom instructions](#custom-instructions)
|
||||
- [Environment variables setup](#environment-variables-setup)
|
||||
- [FAQ](#faq)
|
||||
- [Zero data retention (ZDR) usage](#zero-data-retention-zdr-usage)
|
||||
- [Codex open source fund](#codex-open-source-fund)
|
||||
- [Contributing](#contributing)
|
||||
- [Development workflow](#development-workflow)
|
||||
- [Git hooks with Husky](#git-hooks-with-husky)
|
||||
- [Debugging](#debugging)
|
||||
- [Writing high-impact code changes](#writing-high-impact-code-changes)
|
||||
- [Opening a pull request](#opening-a-pull-request)
|
||||
- [Review process](#review-process)
|
||||
- [Community values](#community-values)
|
||||
- [Getting help](#getting-help)
|
||||
- [Contributor license agreement (CLA)](#contributor-license-agreement-cla)
|
||||
- [Quick fixes](#quick-fixes)
|
||||
- [Releasing `codex`](#releasing-codex)
|
||||
- [Alternative build options](#alternative-build-options)
|
||||
- [Nix flake development](#nix-flake-development)
|
||||
- [Security & responsible AI](#security--responsible-ai)
|
||||
- [License](#license)
|
||||
|
||||
<!-- End ToC -->
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Experimental technology disclaimer
|
||||
|
||||
Codex CLI is an experimental project under active development. It is not yet stable, may contain bugs, incomplete features, or undergo breaking changes. We're building it in the open with the community and welcome:
|
||||
|
||||
- Bug reports
|
||||
- Feature requests
|
||||
- Pull requests
|
||||
- Good vibes
|
||||
|
||||
Help us improve by filing issues or submitting PRs (see the section below for how to contribute)!
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install globally:
|
||||
|
||||
```shell
|
||||
npm install -g @openai/codex
|
||||
```
|
||||
|
||||
Next, set your OpenAI API key as an environment variable:
|
||||
|
||||
```shell
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
```
|
||||
|
||||
> **Note:** This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`) but we recommend setting for the session. **Tip:** You can also place your API key into a `.env` file at the root of your project:
|
||||
>
|
||||
> ```env
|
||||
> OPENAI_API_KEY=your-api-key-here
|
||||
> ```
|
||||
>
|
||||
> The CLI will automatically load variables from `.env` (via `dotenv/config`).
|
||||
|
||||
<details>
|
||||
<summary><strong>Use <code>--provider</code> to use other models</strong></summary>
|
||||
|
||||
> Codex also allows you to use other providers that support the OpenAI Chat Completions API. You can set the provider in the config file or use the `--provider` flag. The possible options for `--provider` are:
|
||||
>
|
||||
> - openai (default)
|
||||
> - openrouter
|
||||
> - azure
|
||||
> - gemini
|
||||
> - ollama
|
||||
> - mistral
|
||||
> - deepseek
|
||||
> - xai
|
||||
> - groq
|
||||
> - arceeai
|
||||
> - any other provider that is compatible with the OpenAI API
|
||||
>
|
||||
> If you use a provider other than OpenAI, you will need to set the API key for the provider in the config file or in the environment variable as:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_API_KEY="your-api-key-here"
|
||||
> ```
|
||||
>
|
||||
> If you use a provider not listed above, you must also set the base URL for the provider:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_BASE_URL="https://your-provider-api-base-url"
|
||||
> ```
|
||||
|
||||
</details>
|
||||
<br />
|
||||
|
||||
Run interactively:
|
||||
|
||||
```shell
|
||||
codex
|
||||
```
|
||||
|
||||
Or, run with a prompt as input (and optionally in `Full Auto` mode):
|
||||
|
||||
```shell
|
||||
codex "explain this codebase to me"
|
||||
```
|
||||
|
||||
```shell
|
||||
codex --approval-mode full-auto "create the fanciest todo-list app"
|
||||
```
|
||||
|
||||
That's it - Codex will scaffold a file, run it inside a sandbox, install any
|
||||
missing dependencies, and show you the live result. Approve the changes and
|
||||
they'll be committed to your working directory.
|
||||
|
||||
---
|
||||
|
||||
## Why Codex?
|
||||
|
||||
Codex CLI is built for developers who already **live in the terminal** and want
|
||||
ChatGPT-level reasoning **plus** the power to actually run code, manipulate
|
||||
files, and iterate - all under version control. In short, it's _chat-driven
|
||||
development_ that understands and executes your repo.
|
||||
|
||||
- **Zero setup** - bring your OpenAI API key and it just works!
|
||||
- **Full auto-approval, while safe + secure** by running network-disabled and directory-sandboxed
|
||||
- **Multimodal** - pass in screenshots or diagrams to implement features ✨
|
||||
|
||||
And it's **fully open-source** so you can see and contribute to how it develops!
|
||||
|
||||
---
|
||||
|
||||
## Security model & permissions
|
||||
|
||||
Codex lets you decide _how much autonomy_ the agent receives and auto-approval policy via the
|
||||
`--approval-mode` flag (or the interactive onboarding prompt):
|
||||
|
||||
| Mode | What the agent may do without asking | Still requires approval |
|
||||
| ------------------------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **Suggest** <br>(default) | <li>Read any file in the repo | <li>**All** file writes/patches<li> **Any** arbitrary shell commands (aside from reading files) |
|
||||
| **Auto Edit** | <li>Read **and** apply-patch writes to files | <li>**All** shell commands |
|
||||
| **Full Auto** | <li>Read/write files <li> Execute shell commands (network disabled, writes limited to your workdir) | - |
|
||||
|
||||
In **Full Auto** every command is run **network-disabled** and confined to the
|
||||
current working directory (plus temporary files) for defense-in-depth. Codex
|
||||
will also show a warning/confirmation if you start in **auto-edit** or
|
||||
**full-auto** while the directory is _not_ tracked by Git, so you always have a
|
||||
safety net.
|
||||
|
||||
Coming soon: you'll be able to whitelist specific commands to auto-execute with
|
||||
the network enabled, once we're confident in additional safeguards.
|
||||
|
||||
### Platform sandboxing details
|
||||
|
||||
The hardening mechanism Codex uses depends on your OS:
|
||||
|
||||
- **macOS 12+** - commands are wrapped with **Apple Seatbelt** (`sandbox-exec`).
|
||||
|
||||
- Everything is placed in a read-only jail except for a small set of
|
||||
writable roots (`$PWD`, `$TMPDIR`, `~/.codex`, etc.).
|
||||
- Outbound network is _fully blocked_ by default - even if a child process
|
||||
tries to `curl` somewhere it will fail.
|
||||
|
||||
- **Linux** - there is no sandboxing by default.
|
||||
We recommend using Docker for sandboxing, where Codex launches itself inside a **minimal
|
||||
container image** and mounts your repo _read/write_ at the same path. A
|
||||
custom `iptables`/`ipset` firewall script denies all egress except the
|
||||
OpenAI API. This gives you deterministic, reproducible runs without needing
|
||||
root on the host. You can use the [`run_in_container.sh`](../codex-cli/scripts/run_in_container.sh) script to set up the sandbox.
|
||||
|
||||
---
|
||||
|
||||
## System requirements
|
||||
|
||||
| Requirement | Details |
|
||||
| --------------------------- | --------------------------------------------------------------- |
|
||||
| Operating systems | macOS 12+, Ubuntu 20.04+/Debian 10+, or Windows 11 **via WSL2** |
|
||||
| Node.js | **22 or newer** (LTS recommended) |
|
||||
| Git (optional, recommended) | 2.23+ for built-in PR helpers |
|
||||
| RAM | 4-GB minimum (8-GB recommended) |
|
||||
|
||||
> Never run `sudo npm install -g`; fix npm permissions instead.
|
||||
|
||||
---
|
||||
|
||||
## CLI reference
|
||||
|
||||
| Command | Purpose | Example |
|
||||
| ------------------------------------ | ----------------------------------- | ------------------------------------ |
|
||||
| `codex` | Interactive REPL | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive REPL | `codex "fix lint errors"` |
|
||||
| `codex -q "..."` | Non-interactive "quiet mode" | `codex -q --json "explain utils.ts"` |
|
||||
| `codex completion <bash\|zsh\|fish>` | Print shell completion script | `codex completion bash` |
|
||||
|
||||
Key flags: `--model/-m`, `--approval-mode/-a`, `--quiet/-q`, and `--notify`.
|
||||
|
||||
---
|
||||
|
||||
## Memory & project docs
|
||||
|
||||
You can give Codex extra instructions and guidance using `AGENTS.md` files. Codex looks for `AGENTS.md` files in the following places, and merges them top-down:
|
||||
|
||||
1. `~/.codex/AGENTS.md` - personal global guidance
|
||||
2. `AGENTS.md` at repo root - shared project notes
|
||||
3. `AGENTS.md` in the current working directory - sub-folder/feature specifics
|
||||
|
||||
Disable loading of these files with `--no-project-doc` or the environment variable `CODEX_DISABLE_PROJECT_DOC=1`.
|
||||
|
||||
---
|
||||
|
||||
## Non-interactive / CI mode
|
||||
|
||||
Run Codex head-less in pipelines. Example GitHub Action step:
|
||||
|
||||
```yaml
|
||||
- name: Update changelog via Codex
|
||||
run: |
|
||||
npm install -g @openai/codex
|
||||
export OPENAI_API_KEY="${{ secrets.OPENAI_KEY }}"
|
||||
codex -a auto-edit --quiet "update CHANGELOG for next release"
|
||||
```
|
||||
|
||||
Set `CODEX_QUIET_MODE=1` to silence interactive UI noise.
|
||||
|
||||
## Tracing / verbose logging
|
||||
|
||||
Setting the environment variable `DEBUG=true` prints full API request and response details:
|
||||
|
||||
```shell
|
||||
DEBUG=true codex
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recipes
|
||||
|
||||
Below are a few bite-size examples you can copy-paste. Replace the text in quotes with your own task. See the [prompting guide](https://github.com/openai/codex/blob/main/codex-cli/examples/prompting_guide.md) for more tips and usage patterns.
|
||||
|
||||
| ✨ | What you type | What happens |
|
||||
| --- | ------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
|
||||
| 1 | `codex "Refactor the Dashboard component to React Hooks"` | Codex rewrites the class component, runs `npm test`, and shows the diff. |
|
||||
| 2 | `codex "Generate SQL migrations for adding a users table"` | Infers your ORM, creates migration files, and runs them in a sandboxed DB. |
|
||||
| 3 | `codex "Write unit tests for utils/date.ts"` | Generates tests, executes them, and iterates until they pass. |
|
||||
| 4 | `codex "Bulk-rename *.jpeg -> *.jpg with git mv"` | Safely renames files and updates imports/usages. |
|
||||
| 5 | `codex "Explain what this regex does: ^(?=.*[A-Z]).{8,}$"` | Outputs a step-by-step human explanation. |
|
||||
| 6 | `codex "Carefully review this repo, and propose 3 high impact well-scoped PRs"` | Suggests impactful PRs in the current codebase. |
|
||||
| 7 | `codex "Look for vulnerabilities and create a security review report"` | Finds and explains security bugs. |
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
<details open>
|
||||
<summary><strong>From npm (Recommended)</strong></summary>
|
||||
|
||||
```bash
|
||||
npm install -g @openai/codex
|
||||
# or
|
||||
yarn global add @openai/codex
|
||||
# or
|
||||
bun install -g @openai/codex
|
||||
# or
|
||||
pnpm add -g @openai/codex
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Build from source</strong></summary>
|
||||
|
||||
```bash
|
||||
# Clone the repository and navigate to the CLI package
|
||||
git clone https://github.com/openai/codex.git
|
||||
cd codex/codex-cli
|
||||
|
||||
# Enable corepack
|
||||
corepack enable
|
||||
|
||||
# Install dependencies and build
|
||||
pnpm install
|
||||
pnpm build
|
||||
|
||||
# Linux-only: download prebuilt sandboxing binaries (requires gh and zstd).
|
||||
./scripts/install_native_deps.sh
|
||||
|
||||
# Get the usage and the options
|
||||
node ./dist/cli.js --help
|
||||
|
||||
# Run the locally-built CLI directly
|
||||
node ./dist/cli.js
|
||||
|
||||
# Or link the command globally for convenience
|
||||
pnpm link
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Configuration guide
|
||||
|
||||
Codex configuration files can be placed in the `~/.codex/` directory, supporting both YAML and JSON formats.
|
||||
|
||||
### Basic configuration parameters
|
||||
|
||||
| Parameter | Type | Default | Description | Available Options |
|
||||
| ------------------- | ------- | ---------- | -------------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `model` | string | `o4-mini` | AI model to use | Any model name supporting OpenAI API |
|
||||
| `approvalMode` | string | `suggest` | AI assistant's permission mode | `suggest` (suggestions only)<br>`auto-edit` (automatic edits)<br>`full-auto` (fully automatic) |
|
||||
| `fullAutoErrorMode` | string | `ask-user` | Error handling in full-auto mode | `ask-user` (prompt for user input)<br>`ignore-and-continue` (ignore and proceed) |
|
||||
| `notify` | boolean | `true` | Enable desktop notifications | `true`/`false` |
|
||||
|
||||
### Custom AI provider configuration
|
||||
|
||||
In the `providers` object, you can configure multiple AI service providers. Each provider requires the following parameters:
|
||||
|
||||
| Parameter | Type | Description | Example |
|
||||
| --------- | ------ | --------------------------------------- | ----------------------------- |
|
||||
| `name` | string | Display name of the provider | `"OpenAI"` |
|
||||
| `baseURL` | string | API service URL | `"https://api.openai.com/v1"` |
|
||||
| `envKey` | string | Environment variable name (for API key) | `"OPENAI_API_KEY"` |
|
||||
|
||||
### History configuration
|
||||
|
||||
In the `history` object, you can configure conversation history settings:
|
||||
|
||||
| Parameter | Type | Description | Example Value |
|
||||
| ------------------- | ------- | ------------------------------------------------------ | ------------- |
|
||||
| `maxSize` | number | Maximum number of history entries to save | `1000` |
|
||||
| `saveHistory` | boolean | Whether to save history | `true` |
|
||||
| `sensitivePatterns` | array | Patterns of sensitive information to filter in history | `[]` |
|
||||
|
||||
### Configuration examples
|
||||
|
||||
1. YAML format (save as `~/.codex/config.yaml`):
|
||||
|
||||
```yaml
|
||||
model: o4-mini
|
||||
approvalMode: suggest
|
||||
fullAutoErrorMode: ask-user
|
||||
notify: true
|
||||
```
|
||||
|
||||
2. JSON format (save as `~/.codex/config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"approvalMode": "suggest",
|
||||
"fullAutoErrorMode": "ask-user",
|
||||
"notify": true
|
||||
}
|
||||
```
|
||||
|
||||
### Full configuration example
|
||||
|
||||
Below is a comprehensive example of `config.json` with multiple custom providers:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"provider": "openai",
|
||||
"providers": {
|
||||
"openai": {
|
||||
"name": "OpenAI",
|
||||
"baseURL": "https://api.openai.com/v1",
|
||||
"envKey": "OPENAI_API_KEY"
|
||||
},
|
||||
"azure": {
|
||||
"name": "AzureOpenAI",
|
||||
"baseURL": "https://YOUR_PROJECT_NAME.openai.azure.com/openai",
|
||||
"envKey": "AZURE_OPENAI_API_KEY"
|
||||
},
|
||||
"openrouter": {
|
||||
"name": "OpenRouter",
|
||||
"baseURL": "https://openrouter.ai/api/v1",
|
||||
"envKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"gemini": {
|
||||
"name": "Gemini",
|
||||
"baseURL": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
"envKey": "GEMINI_API_KEY"
|
||||
},
|
||||
"ollama": {
|
||||
"name": "Ollama",
|
||||
"baseURL": "http://localhost:11434/v1",
|
||||
"envKey": "OLLAMA_API_KEY"
|
||||
},
|
||||
"mistral": {
|
||||
"name": "Mistral",
|
||||
"baseURL": "https://api.mistral.ai/v1",
|
||||
"envKey": "MISTRAL_API_KEY"
|
||||
},
|
||||
"deepseek": {
|
||||
"name": "DeepSeek",
|
||||
"baseURL": "https://api.deepseek.com",
|
||||
"envKey": "DEEPSEEK_API_KEY"
|
||||
},
|
||||
"xai": {
|
||||
"name": "xAI",
|
||||
"baseURL": "https://api.x.ai/v1",
|
||||
"envKey": "XAI_API_KEY"
|
||||
},
|
||||
"groq": {
|
||||
"name": "Groq",
|
||||
"baseURL": "https://api.groq.com/openai/v1",
|
||||
"envKey": "GROQ_API_KEY"
|
||||
},
|
||||
"arceeai": {
|
||||
"name": "ArceeAI",
|
||||
"baseURL": "https://conductor.arcee.ai/v1",
|
||||
"envKey": "ARCEEAI_API_KEY"
|
||||
}
|
||||
},
|
||||
"history": {
|
||||
"maxSize": 1000,
|
||||
"saveHistory": true,
|
||||
"sensitivePatterns": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Custom instructions
|
||||
|
||||
You can create a `~/.codex/AGENTS.md` file to define custom guidance for the agent:
|
||||
|
||||
```markdown
|
||||
- Always respond with emojis
|
||||
- Only use git commands when explicitly requested
|
||||
```
|
||||
|
||||
### Environment variables setup
|
||||
|
||||
For each AI provider, you need to set the corresponding API key in your environment variables. For example:
|
||||
|
||||
```bash
|
||||
# OpenAI
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_API_KEY="your-azure-api-key-here"
|
||||
export AZURE_OPENAI_API_VERSION="2025-04-01-preview" (Optional)
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY="your-openrouter-key-here"
|
||||
|
||||
# Similarly for other providers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAQ
|
||||
|
||||
<details>
|
||||
<summary>OpenAI released a model called Codex in 2021 - is this related?</summary>
|
||||
|
||||
In 2021, OpenAI released Codex, an AI system designed to generate code from natural language prompts. That original Codex model was deprecated as of March 2023 and is separate from the CLI tool.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Which models are supported?</summary>
|
||||
|
||||
Any model available with [Responses API](https://platform.openai.com/docs/api-reference/responses). The default is `o4-mini`, but pass `--model gpt-4.1` or set `model: gpt-4.1` in your config file to override.
|
||||
|
||||
</details>
|
||||
<details>
|
||||
<summary>Why does <code>o3</code> or <code>o4-mini</code> not work for me?</summary>
|
||||
|
||||
It's possible that your [API account needs to be verified](https://help.openai.com/en/articles/10910291-api-organization-verification) in order to start streaming responses and seeing chain of thought summaries from the API. If you're still running into issues, please let us know!
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>How do I stop Codex from editing my files?</summary>
|
||||
|
||||
Codex runs model-generated commands in a sandbox. If a proposed command or file change doesn't look right, you can simply type **n** to deny the command or give the model feedback.
|
||||
|
||||
</details>
|
||||
<details>
|
||||
<summary>Does it work on Windows?</summary>
|
||||
|
||||
Not directly. It requires [Windows Subsystem for Linux (WSL2)](https://learn.microsoft.com/en-us/windows/wsl/install) - Codex has been tested on macOS and Linux with Node 22.
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Zero data retention (ZDR) usage
|
||||
|
||||
Codex CLI **does** support OpenAI organizations with [Zero Data Retention (ZDR)](https://platform.openai.com/docs/guides/your-data#zero-data-retention) enabled. If your OpenAI organization has Zero Data Retention enabled and you still encounter errors such as:
|
||||
|
||||
```
|
||||
OpenAI rejected the request. Error details: Status: 400, Code: unsupported_parameter, Type: invalid_request_error, Message: 400 Previous response cannot be used for this organization due to Zero Data Retention.
|
||||
```
|
||||
|
||||
You may need to upgrade to a more recent version with: `npm i -g @openai/codex@latest`
|
||||
|
||||
---
|
||||
|
||||
## Codex open source fund
|
||||
|
||||
We're excited to launch a **$1 million initiative** supporting open source projects that use Codex CLI and other OpenAI models.
|
||||
|
||||
- Grants are awarded up to **$25,000** API credits.
|
||||
- Applications are reviewed **on a rolling basis**.
|
||||
|
||||
**Interested? [Apply here](https://openai.com/form/codex-open-source-fund/).**
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
This project is under active development and the code will likely change pretty significantly. We'll update this message once that's complete!
|
||||
|
||||
More broadly we welcome contributions - whether you are opening your very first pull request or you're a seasoned maintainer. At the same time we care about reliability and long-term maintainability, so the bar for merging code is intentionally **high**. The guidelines below spell out what "high-quality" means in practice and should make the whole process transparent and friendly.
|
||||
|
||||
### Development workflow
|
||||
|
||||
- Create a _topic branch_ from `main` - e.g. `feat/interactive-prompt`.
|
||||
- Keep your changes focused. Multiple unrelated fixes should be opened as separate PRs.
|
||||
- Use `pnpm test:watch` during development for super-fast feedback.
|
||||
- We use **Vitest** for unit tests, **ESLint** + **Prettier** for style, and **TypeScript** for type-checking.
|
||||
- Before pushing, run the full test/type/lint suite:
|
||||
|
||||
### Git hooks with Husky
|
||||
|
||||
This project uses [Husky](https://typicode.github.io/husky/) to enforce code quality checks:
|
||||
|
||||
- **Pre-commit hook**: Automatically runs lint-staged to format and lint files before committing
|
||||
- **Pre-push hook**: Runs tests and type checking before pushing to the remote
|
||||
|
||||
These hooks help maintain code quality and prevent pushing code with failing tests. For more details, see [HUSKY.md](./HUSKY.md).
|
||||
|
||||
```bash
|
||||
pnpm test && pnpm run lint && pnpm run typecheck
|
||||
```
|
||||
|
||||
- If you have **not** yet signed the Contributor License Agreement (CLA), add a PR comment containing the exact text
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
The CLA-Assistant bot will turn the PR status green once all authors have signed.
|
||||
|
||||
```bash
|
||||
# Watch mode (tests rerun on change)
|
||||
pnpm test:watch
|
||||
|
||||
# Type-check without emitting files
|
||||
pnpm typecheck
|
||||
|
||||
# Automatically fix lint + prettier issues
|
||||
pnpm lint:fix
|
||||
pnpm format:fix
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
To debug the CLI with a visual debugger, do the following in the `codex-cli` folder:
|
||||
|
||||
- Run `pnpm run build` to build the CLI, which will generate `cli.js.map` alongside `cli.js` in the `dist` folder.
|
||||
- Run the CLI with `node --inspect-brk ./dist/cli.js` The program then waits until a debugger is attached before proceeding. Options:
|
||||
- In VS Code, choose **Debug: Attach to Node Process** from the command palette and choose the option in the dropdown with debug port `9229` (likely the first option)
|
||||
- Go to <chrome://inspect> in Chrome and find **localhost:9229** and click **trace**
|
||||
|
||||
### Writing high-impact code changes
|
||||
|
||||
1. **Start with an issue.** Open a new one or comment on an existing discussion so we can agree on the solution before code is written.
|
||||
2. **Add or update tests.** Every new feature or bug-fix should come with test coverage that fails before your change and passes afterwards. 100% coverage is not required, but aim for meaningful assertions.
|
||||
3. **Document behaviour.** If your change affects user-facing behaviour, update the README, inline help (`codex --help`), or relevant example projects.
|
||||
4. **Keep commits atomic.** Each commit should compile and the tests should pass. This makes reviews and potential rollbacks easier.
|
||||
|
||||
### Opening a pull request
|
||||
|
||||
- Fill in the PR template (or include similar information) - **What? Why? How?**
|
||||
- Run **all** checks locally (`npm test && npm run lint && npm run typecheck`). CI failures that could have been caught locally slow down the process.
|
||||
- Make sure your branch is up-to-date with `main` and that you have resolved merge conflicts.
|
||||
- Mark the PR as **Ready for review** only when you believe it is in a merge-able state.
|
||||
|
||||
### Review process
|
||||
|
||||
1. One maintainer will be assigned as a primary reviewer.
|
||||
2. We may ask for changes - please do not take this personally. We value the work, we just also value consistency and long-term maintainability.
|
||||
3. When there is consensus that the PR meets the bar, a maintainer will squash-and-merge.
|
||||
|
||||
### Community values
|
||||
|
||||
- **Be kind and inclusive.** Treat others with respect; we follow the [Contributor Covenant](https://www.contributor-covenant.org/).
|
||||
- **Assume good intent.** Written communication is hard - err on the side of generosity.
|
||||
- **Teach & learn.** If you spot something confusing, open an issue or PR with improvements.
|
||||
|
||||
### Getting help
|
||||
|
||||
If you run into problems setting up the project, would like feedback on an idea, or just want to say _hi_ - please open a Discussion or jump into the relevant issue. We are happy to help.
|
||||
|
||||
Together we can make Codex CLI an incredible tool. **Happy hacking!** :rocket:
|
||||
|
||||
### Contributor license agreement (CLA)
|
||||
|
||||
All contributors **must** accept the CLA. The process is lightweight:
|
||||
|
||||
1. Open your pull request.
|
||||
2. Paste the following comment (or reply `recheck` if you've signed before):
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
3. The CLA-Assistant bot records your signature in the repo and marks the status check as passed.
|
||||
|
||||
No special Git commands, email attachments, or commit footers required.
|
||||
|
||||
#### Quick fixes
|
||||
|
||||
| Scenario | Command |
|
||||
| ----------------- | ------------------------------------------------ |
|
||||
| Amend last commit | `git commit --amend -s --no-edit && git push -f` |
|
||||
|
||||
The **DCO check** blocks merges until every commit in the PR carries the footer (with squash this is just the one).
|
||||
|
||||
### Releasing `codex`
|
||||
|
||||
To publish a new version of the CLI you first need to stage the npm package. A
|
||||
helper script in `codex-cli/scripts/` does all the heavy lifting. Inside the
|
||||
`codex-cli` folder run:
|
||||
|
||||
```bash
|
||||
# Classic, JS implementation that includes small, native binaries for Linux sandboxing.
|
||||
pnpm stage-release
|
||||
|
||||
# Optionally specify the temp directory to reuse between runs.
|
||||
RELEASE_DIR=$(mktemp -d)
|
||||
pnpm stage-release --tmp "$RELEASE_DIR"
|
||||
|
||||
# "Fat" package that additionally bundles the native Rust CLI binaries for
|
||||
# Linux. End-users can then opt-in at runtime by setting CODEX_RUST=1.
|
||||
pnpm stage-release --native
|
||||
```
|
||||
|
||||
Go to the folder where the release is staged and verify that it works as intended. If so, run the following from the temp folder:
|
||||
|
||||
```
|
||||
cd "$RELEASE_DIR"
|
||||
npm publish
|
||||
```
|
||||
|
||||
### Alternative build options
|
||||
|
||||
#### Nix flake development
|
||||
|
||||
Prerequisite: Nix >= 2.4 with flakes enabled (`experimental-features = nix-command flakes` in `~/.config/nix/nix.conf`).
|
||||
|
||||
Enter a Nix development shell:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix develop .#codex-cli # For entering codex-cli specific shell
|
||||
nix develop .#codex-rs # For entering codex-rs specific shell
|
||||
```
|
||||
|
||||
This shell includes Node.js, installs dependencies, builds the CLI, and provides a `codex` command alias.
|
||||
|
||||
Build and run the CLI directly:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix build .#codex-cli # For building codex-cli
|
||||
nix build .#codex-rs # For building codex-rs
|
||||
./result/bin/codex --help
|
||||
```
|
||||
|
||||
Run the CLI via the flake app:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix run .#codex-cli # For running codex-cli
|
||||
nix run .#codex-rs # For running codex-rs
|
||||
```
|
||||
|
||||
Use direnv with flakes
|
||||
|
||||
If you have direnv installed, you can use the following `.envrc` to automatically enter the Nix shell when you `cd` into the project directory:
|
||||
|
||||
```bash
|
||||
cd codex-rs
|
||||
echo "use flake ../flake.nix#codex-cli" >> .envrc && direnv allow
|
||||
cd codex-cli
|
||||
echo "use flake ../flake.nix#codex-rs" >> .envrc && direnv allow
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security & responsible AI
|
||||
|
||||
Have you discovered a vulnerability or have concerns about model output? Please e-mail **security@openai.com** and we will respond promptly.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
||||
@@ -15,7 +15,6 @@
|
||||
* current platform / architecture, an error is thrown.
|
||||
*/
|
||||
|
||||
import { spawnSync } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath, pathToFileURL } from "url";
|
||||
@@ -35,12 +34,13 @@ const wantsNative = fs.existsSync(path.join(__dirname, "use-native")) ||
|
||||
: false);
|
||||
|
||||
// Try native binary if requested.
|
||||
if (wantsNative) {
|
||||
if (wantsNative && process.platform !== 'win32') {
|
||||
const { platform, arch } = process;
|
||||
|
||||
let targetTriple = null;
|
||||
switch (platform) {
|
||||
case "linux":
|
||||
case "android":
|
||||
switch (arch) {
|
||||
case "x64":
|
||||
targetTriple = "x86_64-unknown-linux-musl";
|
||||
@@ -73,22 +73,76 @@ if (wantsNative) {
|
||||
}
|
||||
|
||||
const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
|
||||
const result = spawnSync(binaryPath, process.argv.slice(2), {
|
||||
|
||||
// Use an asynchronous spawn instead of spawnSync so that Node is able to
|
||||
// respond to signals (e.g. Ctrl-C / SIGINT) while the native binary is
|
||||
// executing. This allows us to forward those signals to the child process
|
||||
// and guarantees that when either the child terminates or the parent
|
||||
// receives a fatal signal, both processes exit in a predictable manner.
|
||||
const { spawn } = await import("child_process");
|
||||
|
||||
const child = spawn(binaryPath, process.argv.slice(2), {
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
const exitCode = typeof result.status === "number" ? result.status : 1;
|
||||
process.exit(exitCode);
|
||||
}
|
||||
child.on("error", (err) => {
|
||||
// Typically triggered when the binary is missing or not executable.
|
||||
// Re-throwing here will terminate the parent with a non-zero exit code
|
||||
// while still printing a helpful stack trace.
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
// Fallback: execute the original JavaScript CLI.
|
||||
// Forward common termination signals to the child so that it shuts down
|
||||
// gracefully. In the handler we temporarily disable the default behavior of
|
||||
// exiting immediately; once the child has been signaled we simply wait for
|
||||
// its exit event which will in turn terminate the parent (see below).
|
||||
const forwardSignal = (signal) => {
|
||||
if (child.killed) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
child.kill(signal);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve the path to the compiled CLI bundle
|
||||
const cliPath = path.resolve(__dirname, "../dist/cli.js");
|
||||
const cliUrl = pathToFileURL(cliPath).href;
|
||||
["SIGINT", "SIGTERM", "SIGHUP"].forEach((sig) => {
|
||||
process.on(sig, () => forwardSignal(sig));
|
||||
});
|
||||
|
||||
// Load and execute the CLI
|
||||
(async () => {
|
||||
// When the child exits, mirror its termination reason in the parent so that
|
||||
// shell scripts and other tooling observe the correct exit status.
|
||||
// Wrap the lifetime of the child process in a Promise so that we can await
|
||||
// its termination in a structured way. The Promise resolves with an object
|
||||
// describing how the child exited: either via exit code or due to a signal.
|
||||
const childResult = await new Promise((resolve) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
resolve({ type: "signal", signal });
|
||||
} else {
|
||||
resolve({ type: "code", exitCode: code ?? 1 });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (childResult.type === "signal") {
|
||||
// Re-emit the same signal so that the parent terminates with the expected
|
||||
// semantics (this also sets the correct exit code of 128 + n).
|
||||
process.kill(process.pid, childResult.signal);
|
||||
} else {
|
||||
process.exit(childResult.exitCode);
|
||||
}
|
||||
} else {
|
||||
// Fallback: execute the original JavaScript CLI.
|
||||
|
||||
// Resolve the path to the compiled CLI bundle
|
||||
const cliPath = path.resolve(__dirname, "../dist/cli.js");
|
||||
const cliUrl = pathToFileURL(cliPath).href;
|
||||
|
||||
// Load and execute the CLI
|
||||
try {
|
||||
await import(cliUrl);
|
||||
} catch (err) {
|
||||
@@ -96,4 +150,4 @@ const cliUrl = pathToFileURL(cliPath).href;
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
@@ -84,6 +84,6 @@
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/openai/codex"
|
||||
"url": "git+https://github.com/openai/codex.git"
|
||||
}
|
||||
}
|
||||
|
||||
9
codex-cli/scripts/README.md
Normal file
9
codex-cli/scripts/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# npm releases
|
||||
|
||||
Run the following:
|
||||
|
||||
To build the 0.2.x or later version of the npm module, which runs the Rust version of the CLI, build it as follows:
|
||||
|
||||
```bash
|
||||
./codex-cli/scripts/stage_rust_release.py --release-version 0.6.0
|
||||
```
|
||||
@@ -8,7 +8,7 @@
|
||||
# the native implementation when users set CODEX_RUST=1.
|
||||
#
|
||||
# Usage
|
||||
# install_native_deps.sh [RELEASE_ROOT] [--full-native]
|
||||
# install_native_deps.sh [--full-native] [--workflow-url URL] [CODEX_CLI_ROOT]
|
||||
#
|
||||
# The optional RELEASE_ROOT is the path that contains package.json. Omitting
|
||||
# it installs the binaries into the repository's own bin/ folder to support
|
||||
@@ -20,32 +20,43 @@ set -euo pipefail
|
||||
# Parse arguments
|
||||
# ------------------
|
||||
|
||||
DEST_DIR=""
|
||||
CODEX_CLI_ROOT=""
|
||||
INCLUDE_RUST=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
# Until we start publishing stable GitHub releases, we have to grab the binaries
|
||||
# from the GitHub Action that created them. Update the URL below to point to the
|
||||
# appropriate workflow run:
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15981617627"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--full-native)
|
||||
INCLUDE_RUST=1
|
||||
;;
|
||||
--workflow-url)
|
||||
shift || { echo "--workflow-url requires an argument"; exit 1; }
|
||||
if [ -n "$1" ]; then
|
||||
WORKFLOW_URL="$1"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$DEST_DIR" ]]; then
|
||||
DEST_DIR="$arg"
|
||||
if [[ -z "$CODEX_CLI_ROOT" ]]; then
|
||||
CODEX_CLI_ROOT="$1"
|
||||
else
|
||||
echo "Unexpected argument: $arg" >&2
|
||||
echo "Unexpected argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Determine where the binaries should be installed.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
if [[ $# -gt 0 ]]; then
|
||||
if [ -n "$CODEX_CLI_ROOT" ]; then
|
||||
# The caller supplied a release root directory.
|
||||
CODEX_CLI_ROOT="$1"
|
||||
BIN_DIR="$CODEX_CLI_ROOT/bin"
|
||||
else
|
||||
# No argument; fall back to the repo’s own bin directory.
|
||||
@@ -62,10 +73,6 @@ mkdir -p "$BIN_DIR"
|
||||
# Download and decompress the artifacts from the GitHub Actions workflow.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# Until we start publishing stable GitHub releases, we have to grab the binaries
|
||||
# from the GitHub Action that created them. Update the URL below to point to the
|
||||
# appropriate workflow run:
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15483730027"
|
||||
WORKFLOW_ID="${WORKFLOW_URL##*/}"
|
||||
|
||||
ARTIFACTS_DIR="$(mktemp -d)"
|
||||
|
||||
@@ -4,10 +4,7 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stages an npm release for @openai/codex.
|
||||
#
|
||||
# The script used to accept a single optional positional argument that indicated
|
||||
# the temporary directory in which to stage the package. We now support a
|
||||
# flag-based interface so that we can extend the command with further options
|
||||
# without breaking the call-site contract.
|
||||
# Usage:
|
||||
#
|
||||
# --tmp <dir> : Use <dir> instead of a freshly created temp directory.
|
||||
# --native : Bundle the pre-built Rust CLI binaries for Linux alongside
|
||||
@@ -30,11 +27,12 @@ set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [--tmp DIR] [--native]
|
||||
Usage: $(basename "$0") [--tmp DIR] [--native] [--version VERSION]
|
||||
|
||||
Options
|
||||
--tmp DIR Use DIR to stage the release (defaults to a fresh mktemp dir)
|
||||
--native Bundle Rust binaries for Linux (fat package)
|
||||
--version Specify the version to release (defaults to a timestamp-based version)
|
||||
-h, --help Show this help
|
||||
|
||||
Legacy positional argument: the first non-flag argument is still interpreted
|
||||
@@ -45,6 +43,9 @@ EOF
|
||||
|
||||
TMPDIR=""
|
||||
INCLUDE_NATIVE=0
|
||||
# Default to a timestamp-based version (keep same scheme as before)
|
||||
VERSION="$(printf '0.1.%d' "$(date +%y%m%d%H%M)")"
|
||||
WORKFLOW_URL=""
|
||||
|
||||
# Manual flag parser - Bash getopts does not handle GNU long options well.
|
||||
while [[ $# -gt 0 ]]; do
|
||||
@@ -59,6 +60,14 @@ while [[ $# -gt 0 ]]; do
|
||||
--native)
|
||||
INCLUDE_NATIVE=1
|
||||
;;
|
||||
--version)
|
||||
shift || { echo "--version requires an argument"; usage 1; }
|
||||
VERSION="$1"
|
||||
;;
|
||||
--workflow-url)
|
||||
shift || { echo "--workflow-url requires an argument"; exit 1; }
|
||||
WORKFLOW_URL="$1"
|
||||
;;
|
||||
-h|--help)
|
||||
usage 0
|
||||
;;
|
||||
@@ -108,9 +117,6 @@ cp -r dist "$TMPDIR/dist"
|
||||
cp -r src "$TMPDIR/src" # keep source for TS sourcemaps
|
||||
cp ../README.md "$TMPDIR" || true # README is one level up - ignore if missing
|
||||
|
||||
# Derive a timestamp-based version (keep same scheme as before)
|
||||
VERSION="$(printf '0.1.%d' "$(date +%y%m%d%H%M)")"
|
||||
|
||||
# Modify package.json - bump version and optionally add the native directory to
|
||||
# the files array so that the binaries are published to npm.
|
||||
|
||||
@@ -121,7 +127,7 @@ jq --arg version "$VERSION" \
|
||||
# 2. Native runtime deps (sandbox plus optional Rust binaries)
|
||||
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
./scripts/install_native_deps.sh "$TMPDIR" --full-native
|
||||
./scripts/install_native_deps.sh --full-native --workflow-url "$WORKFLOW_URL" "$TMPDIR"
|
||||
touch "${TMPDIR}/bin/use-native"
|
||||
else
|
||||
./scripts/install_native_deps.sh "$TMPDIR"
|
||||
@@ -132,7 +138,8 @@ popd >/dev/null
|
||||
echo "Staged version $VERSION for release in $TMPDIR"
|
||||
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
echo "Test Rust:"
|
||||
echo "Verify the CLI:"
|
||||
echo " node ${TMPDIR}/bin/codex.js --version"
|
||||
echo " node ${TMPDIR}/bin/codex.js --help"
|
||||
else
|
||||
echo "Test Node:"
|
||||
|
||||
62
codex-cli/scripts/stage_rust_release.py
Executable file
62
codex-cli/scripts/stage_rust_release.py
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Stage a release for the npm module.
|
||||
|
||||
Run this after the GitHub Release has been created and use
|
||||
`--release-version` to specify the version to release.
|
||||
"""
|
||||
)
|
||||
parser.add_argument(
|
||||
"--release-version", required=True, help="Version to release, e.g., 0.3.0"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
version = args.release_version
|
||||
|
||||
gh_run = subprocess.run(
|
||||
[
|
||||
"gh",
|
||||
"run",
|
||||
"list",
|
||||
"--branch",
|
||||
f"rust-v{version}",
|
||||
"--json",
|
||||
"workflowName,url,headSha",
|
||||
"--jq",
|
||||
'first(.[] | select(.workflowName == "rust-release"))',
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
gh_run.check_returncode()
|
||||
workflow = json.loads(gh_run.stdout)
|
||||
sha = workflow["headSha"]
|
||||
|
||||
print(f"should `git checkout {sha}`")
|
||||
|
||||
current_dir = Path(__file__).parent.resolve()
|
||||
stage_release = subprocess.run(
|
||||
[
|
||||
current_dir / "stage_release.sh",
|
||||
"--version",
|
||||
version,
|
||||
"--workflow-url",
|
||||
workflow["url"],
|
||||
"--native",
|
||||
]
|
||||
)
|
||||
stage_release.check_returncode()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -370,11 +370,26 @@ export function isSafeCommand(
|
||||
reason: "View file with line numbers",
|
||||
group: "Reading files",
|
||||
};
|
||||
case "rg":
|
||||
case "rg": {
|
||||
// Certain ripgrep options execute external commands or invoke other
|
||||
// processes, so we must reject them.
|
||||
const isUnsafe = command.some(
|
||||
(arg: string) =>
|
||||
UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS.has(arg) ||
|
||||
[...UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS].some(
|
||||
(opt) => arg === opt || arg.startsWith(`${opt}=`),
|
||||
),
|
||||
);
|
||||
|
||||
if (isUnsafe) {
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
reason: "Ripgrep search",
|
||||
group: "Searching",
|
||||
};
|
||||
}
|
||||
case "find": {
|
||||
// Certain options to `find` allow executing arbitrary processes, so we
|
||||
// cannot auto-approve them.
|
||||
@@ -495,6 +510,22 @@ const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
|
||||
"-fprintf",
|
||||
]);
|
||||
|
||||
// Ripgrep options that are considered unsafe because they may execute
|
||||
// arbitrary commands or spawn auxiliary processes.
|
||||
const UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS: ReadonlySet<string> = new Set([
|
||||
// Executes an arbitrary command for each matching file.
|
||||
"--pre",
|
||||
// Allows custom hostname command which could leak environment details.
|
||||
"--hostname-bin",
|
||||
]);
|
||||
|
||||
const UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS: ReadonlySet<string> = new Set([
|
||||
// Enables searching inside archives which triggers external decompression
|
||||
// utilities – reject out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
]);
|
||||
|
||||
// ---------------- Helper utilities for complex shell expressions -----------------
|
||||
|
||||
// A conservative allow-list of bash operators that do not, on their own, cause
|
||||
|
||||
@@ -45,6 +45,7 @@ import { createInputItem } from "./utils/input-utils";
|
||||
import { initLogger } from "./utils/logger/log";
|
||||
import { isModelSupportedForResponses } from "./utils/model-utils.js";
|
||||
import { parseToolCall } from "./utils/parsers";
|
||||
import { providers } from "./utils/providers";
|
||||
import { onExit, setInkRenderer } from "./utils/terminal";
|
||||
import chalk from "chalk";
|
||||
import { spawnSync } from "child_process";
|
||||
@@ -327,26 +328,44 @@ try {
|
||||
// ignore errors
|
||||
}
|
||||
|
||||
if (cli.flags.login) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
// Get provider-specific API key if not OpenAI
|
||||
if (provider.toLowerCase() !== "openai") {
|
||||
const providerInfo = providers[provider.toLowerCase()];
|
||||
if (providerInfo) {
|
||||
const providerApiKey = process.env[providerInfo.envKey];
|
||||
if (providerApiKey) {
|
||||
apiKey = providerApiKey;
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else if (!apiKey) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
}
|
||||
|
||||
// Only proceed with OpenAI auth flow if:
|
||||
// 1. Provider is OpenAI and no API key is set, or
|
||||
// 2. Login flag is explicitly set
|
||||
if (provider.toLowerCase() === "openai" && !apiKey) {
|
||||
if (cli.flags.login) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the API key is available as an environment variable for legacy code
|
||||
process.env["OPENAI_API_KEY"] = apiKey;
|
||||
|
||||
if (cli.flags.free) {
|
||||
// Only attempt credit redemption for OpenAI provider
|
||||
if (cli.flags.free && provider.toLowerCase() === "openai") {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`${chalk.bold("codex --free")} attempting to redeem credits...`);
|
||||
if (!savedTokens?.refresh_token) {
|
||||
@@ -379,13 +398,18 @@ if (!apiKey && !NO_API_KEY_REQUIRED.has(provider.toLowerCase())) {
|
||||
? `You can create a key here: ${chalk.bold(
|
||||
chalk.underline("https://platform.openai.com/account/api-keys"),
|
||||
)}\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
: provider.toLowerCase() === "azure"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
`${provider.toUpperCase()}_OPENAI_API_KEY`,
|
||||
)} ` +
|
||||
`in Azure AI Foundry portal at ${chalk.bold(chalk.underline("https://ai.azure.com"))}.\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
}`,
|
||||
);
|
||||
process.exit(1);
|
||||
|
||||
@@ -800,7 +800,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
@@ -1188,7 +1189,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
|
||||
@@ -69,7 +69,7 @@ export const OPENAI_BASE_URL = process.env["OPENAI_BASE_URL"] || "";
|
||||
export let OPENAI_API_KEY = process.env["OPENAI_API_KEY"] || "";
|
||||
|
||||
export const AZURE_OPENAI_API_VERSION =
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-03-01-preview";
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-04-01-preview";
|
||||
|
||||
export const DEFAULT_REASONING_EFFORT = "high";
|
||||
export const OPENAI_ORGANIZATION = process.env["OPENAI_ORGANIZATION"] || "";
|
||||
|
||||
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* tests/agent-azure-responses-endpoint.test.ts
|
||||
*
|
||||
* Verifies that AgentLoop calls the `/responses` endpoint when provider is set to Azure.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
|
||||
// Fake stream that yields a completed response event
|
||||
class FakeStream {
|
||||
async *[Symbol.asyncIterator]() {
|
||||
yield {
|
||||
type: "response.completed",
|
||||
response: { id: "azure_resp", status: "completed", output: [] },
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
|
||||
let lastCreateParams: any = null;
|
||||
|
||||
vi.mock("openai", () => {
|
||||
class FakeDefaultClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class FakeAzureClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class APIConnectionTimeoutError extends Error {}
|
||||
return {
|
||||
__esModule: true,
|
||||
default: FakeDefaultClient,
|
||||
AzureOpenAI: FakeAzureClient,
|
||||
APIConnectionTimeoutError,
|
||||
};
|
||||
});
|
||||
|
||||
// Stub approvals to bypass command approval logic
|
||||
vi.mock("../src/approvals.js", () => ({
|
||||
__esModule: true,
|
||||
alwaysApprovedCommands: new Set<string>(),
|
||||
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }),
|
||||
isSafeCommand: () => null,
|
||||
}));
|
||||
|
||||
// Stub format-command to avoid formatting side effects
|
||||
vi.mock("../src/format-command.js", () => ({
|
||||
__esModule: true,
|
||||
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
|
||||
}));
|
||||
|
||||
// Stub internal logging to keep output clean
|
||||
vi.mock("../src/utils/agent/log.js", () => ({
|
||||
__esModule: true,
|
||||
log: () => {},
|
||||
isLoggingEnabled: () => false,
|
||||
}));
|
||||
|
||||
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
||||
|
||||
describe("AgentLoop Azure provider responses endpoint", () => {
|
||||
beforeEach(() => {
|
||||
lastCreateParams = null;
|
||||
});
|
||||
|
||||
it("calls the /responses endpoint when provider is azure", async () => {
|
||||
const cfg: any = {
|
||||
model: "test-model",
|
||||
provider: "azure",
|
||||
instructions: "",
|
||||
disableResponseStorage: false,
|
||||
notify: false,
|
||||
};
|
||||
const loop = new AgentLoop({
|
||||
additionalWritableRoots: [],
|
||||
model: cfg.model,
|
||||
config: cfg,
|
||||
instructions: cfg.instructions,
|
||||
approvalPolicy: { mode: "suggest" } as any,
|
||||
onItem: () => {},
|
||||
onLoading: () => {},
|
||||
getCommandConfirmation: async () => ({ review: "yes" }) as any,
|
||||
onLastResponseId: () => {},
|
||||
});
|
||||
|
||||
await loop.run([
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "hello" }],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(lastCreateParams).not.toBeNull();
|
||||
expect(lastCreateParams.model).toBe(cfg.model);
|
||||
expect(Array.isArray(lastCreateParams.input)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -44,6 +44,14 @@ describe("canAutoApprove()", () => {
|
||||
group: "Navigating",
|
||||
runInSandbox: false,
|
||||
});
|
||||
|
||||
// Ripgrep safe invocation.
|
||||
expect(check(["rg", "TODO"])).toEqual({
|
||||
type: "auto-approve",
|
||||
reason: "Ripgrep search",
|
||||
group: "Searching",
|
||||
runInSandbox: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("simple safe commands within a `bash -lc` call", () => {
|
||||
@@ -67,6 +75,24 @@ describe("canAutoApprove()", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("ripgrep unsafe flags", () => {
|
||||
// Flags that do not take arguments
|
||||
expect(check(["rg", "--search-zip", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "-z", "TODO"])).toEqual({ type: "ask-user" });
|
||||
|
||||
// Flags that take arguments (provided separately)
|
||||
expect(check(["rg", "--pre", "cat", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "--hostname-bin", "hostname", "TODO"])).toEqual({
|
||||
type: "ask-user",
|
||||
});
|
||||
|
||||
// Flags that take arguments in = form
|
||||
expect(check(["rg", "--pre=cat", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "--hostname-bin=hostname", "TODO"])).toEqual({
|
||||
type: "ask-user",
|
||||
});
|
||||
});
|
||||
|
||||
test("bash -lc commands with unsafe redirects", () => {
|
||||
expect(check(["bash", "-lc", "echo hello > file.txt"])).toEqual({
|
||||
type: "ask-user",
|
||||
|
||||
1413
codex-rs/Cargo.lock
generated
1413
codex-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,7 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"login",
|
||||
"mcp-client",
|
||||
@@ -36,3 +37,6 @@ lto = "fat"
|
||||
# Because we bundle some of these executables with the TypeScript CLI, we
|
||||
# remove everything to make the binary as small as possible.
|
||||
strip = "symbols"
|
||||
|
||||
# See https://github.com/openai/codex/issues/1411 for details.
|
||||
codegen-units = 1
|
||||
|
||||
@@ -39,25 +39,52 @@ You can enable notifications by configuring a script that is run whenever the ag
|
||||
|
||||
To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on.
|
||||
|
||||
### Use `@` for file search
|
||||
|
||||
Typing `@` triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search.
|
||||
|
||||
### `--cd`/`-C` flag
|
||||
|
||||
Sometimes it is not convenient to `cd` to the directory you want Codex to use as the "working root" before running Codex. Fortunately, `codex` supports a `--cd` option so you can specify whatever folder you want. You can confirm that Codex is honoring `--cd` by double-checking the **workdir** it reports in the TUI at the start of a new session.
|
||||
|
||||
### Shell completions
|
||||
|
||||
Generate shell completion scripts via:
|
||||
|
||||
```shell
|
||||
codex completion bash
|
||||
codex completion zsh
|
||||
codex completion fish
|
||||
```
|
||||
|
||||
### Experimenting with the Codex Sandbox
|
||||
|
||||
To test to see what happens when a command is run under the sandbox provided by Codex, we provide the following subcommands in Codex CLI:
|
||||
|
||||
```
|
||||
# macOS
|
||||
codex debug seatbelt [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
codex debug seatbelt [--full-auto] [COMMAND]...
|
||||
|
||||
# Linux
|
||||
codex debug landlock [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
codex debug landlock [--full-auto] [COMMAND]...
|
||||
```
|
||||
|
||||
You can experiment with different values of `-s` to see what permissions the `COMMAND` needs to execute successfully.
|
||||
### Selecting a sandbox policy via `--sandbox`
|
||||
|
||||
Note that the exact API for the `-s` flag is currently in flux. See https://github.com/openai/codex/issues/1248 for details.
|
||||
The Rust CLI exposes a dedicated `--sandbox` (`-s`) flag that lets you pick the sandbox policy **without** having to reach for the generic `-c/--config` option:
|
||||
|
||||
```shell
|
||||
# Run Codex with the default, read-only sandbox
|
||||
codex --sandbox read-only
|
||||
|
||||
# Allow the agent to write within the current workspace while still blocking network access
|
||||
codex --sandbox workspace-write
|
||||
|
||||
# Danger! Disable sandboxing entirely (only do this if you are already running in a container or other isolated env)
|
||||
codex --sandbox danger-full-access
|
||||
```
|
||||
|
||||
The same setting can be persisted in `~/.codex/config.toml` via the top-level `sandbox_mode = "MODE"` key, e.g. `sandbox_mode = "workspace-write"`.
|
||||
|
||||
## Code Organization
|
||||
|
||||
|
||||
@@ -12,11 +12,10 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
serde_json = "1.0.110"
|
||||
similar = "2.7.0"
|
||||
thiserror = "2.0.12"
|
||||
tree-sitter = "0.25.3"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter = "0.25.8"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.1"
|
||||
|
||||
@@ -633,7 +633,7 @@ mod tests {
|
||||
|
||||
/// Helper to construct a patch with the given body.
|
||||
fn wrap_patch(body: &str) -> String {
|
||||
format!("*** Begin Patch\n{}\n*** End Patch", body)
|
||||
format!("*** Begin Patch\n{body}\n*** End Patch")
|
||||
}
|
||||
|
||||
fn strs_to_strings(strs: &[&str]) -> Vec<String> {
|
||||
@@ -661,7 +661,7 @@ mod tests {
|
||||
}]
|
||||
);
|
||||
}
|
||||
result => panic!("expected MaybeApplyPatch::Body got {:?}", result),
|
||||
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -688,7 +688,7 @@ PATCH"#,
|
||||
}]
|
||||
);
|
||||
}
|
||||
result => panic!("expected MaybeApplyPatch::Body got {:?}", result),
|
||||
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
21
codex-rs/chatgpt/Cargo.toml
Normal file
21
codex-rs/chatgpt/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "codex-chatgpt"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
codex-common = { path = "../common", features = ["cli"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-login = { path = "../login" }
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
5
codex-rs/chatgpt/README.md
Normal file
5
codex-rs/chatgpt/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# ChatGPT
|
||||
|
||||
This crate pertains to first party ChatGPT APIs and products such as Codex agent.
|
||||
|
||||
This crate should be primarily built and maintained by OpenAI employees. Please reach out to a maintainer before making an external contribution.
|
||||
101
codex-rs/chatgpt/src/apply_command.rs
Normal file
101
codex-rs/chatgpt/src/apply_command.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
|
||||
use crate::chatgpt_token::init_chatgpt_token_from_auth;
|
||||
use crate::get_task::GetTaskResponse;
|
||||
use crate::get_task::OutputItem;
|
||||
use crate::get_task::PrOutputItem;
|
||||
use crate::get_task::get_task;
|
||||
|
||||
/// Applies the latest diff from a Codex agent task.
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct ApplyCommand {
|
||||
pub task_id: String,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
}
|
||||
pub async fn run_apply_command(
|
||||
apply_cli: ApplyCommand,
|
||||
cwd: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let config = Config::load_with_cli_overrides(
|
||||
apply_cli
|
||||
.config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?,
|
||||
ConfigOverrides::default(),
|
||||
)?;
|
||||
|
||||
init_chatgpt_token_from_auth(&config.codex_home).await?;
|
||||
|
||||
let task_response = get_task(&config, apply_cli.task_id).await?;
|
||||
apply_diff_from_task(task_response, cwd).await
|
||||
}
|
||||
|
||||
pub async fn apply_diff_from_task(
|
||||
task_response: GetTaskResponse,
|
||||
cwd: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let diff_turn = match task_response.current_diff_task_turn {
|
||||
Some(turn) => turn,
|
||||
None => anyhow::bail!("No diff turn found"),
|
||||
};
|
||||
let output_diff = diff_turn.output_items.iter().find_map(|item| match item {
|
||||
OutputItem::Pr(PrOutputItem { output_diff }) => Some(output_diff),
|
||||
_ => None,
|
||||
});
|
||||
match output_diff {
|
||||
Some(output_diff) => apply_diff(&output_diff.diff, cwd).await,
|
||||
None => anyhow::bail!("No PR output item found"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn apply_diff(diff: &str, cwd: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
let mut cmd = tokio::process::Command::new("git");
|
||||
if let Some(cwd) = cwd {
|
||||
cmd.current_dir(cwd);
|
||||
}
|
||||
let toplevel_output = cmd
|
||||
.args(vec!["rev-parse", "--show-toplevel"])
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !toplevel_output.status.success() {
|
||||
anyhow::bail!("apply must be run from a git repository.");
|
||||
}
|
||||
|
||||
let repo_root = String::from_utf8(toplevel_output.stdout)?
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let mut git_apply_cmd = tokio::process::Command::new("git")
|
||||
.args(vec!["apply", "--3way"])
|
||||
.current_dir(&repo_root)
|
||||
.stdin(std::process::Stdio::piped())
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()?;
|
||||
|
||||
if let Some(mut stdin) = git_apply_cmd.stdin.take() {
|
||||
tokio::io::AsyncWriteExt::write_all(&mut stdin, diff.as_bytes()).await?;
|
||||
drop(stdin);
|
||||
}
|
||||
|
||||
let output = git_apply_cmd.wait_with_output().await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Git apply failed with status {}: {}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
println!("Successfully applied diff");
|
||||
Ok(())
|
||||
}
|
||||
45
codex-rs/chatgpt/src/chatgpt_client.rs
Normal file
45
codex-rs/chatgpt/src/chatgpt_client.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use codex_core::config::Config;
|
||||
|
||||
use crate::chatgpt_token::get_chatgpt_token_data;
|
||||
use crate::chatgpt_token::init_chatgpt_token_from_auth;
|
||||
|
||||
use anyhow::Context;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
/// Make a GET request to the ChatGPT backend API.
|
||||
pub(crate) async fn chatgpt_get_request<T: DeserializeOwned>(
|
||||
config: &Config,
|
||||
path: String,
|
||||
) -> anyhow::Result<T> {
|
||||
let chatgpt_base_url = &config.chatgpt_base_url;
|
||||
init_chatgpt_token_from_auth(&config.codex_home).await?;
|
||||
|
||||
// Make direct HTTP request to ChatGPT backend API with the token
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{chatgpt_base_url}{path}");
|
||||
|
||||
let token =
|
||||
get_chatgpt_token_data().ok_or_else(|| anyhow::anyhow!("ChatGPT token not available"))?;
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.bearer_auth(&token.access_token)
|
||||
.header("chatgpt-account-id", &token.account_id)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("User-Agent", "codex-cli")
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send request")?;
|
||||
|
||||
if response.status().is_success() {
|
||||
let result: T = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse JSON response")?;
|
||||
Ok(result)
|
||||
} else {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Request failed with status {}: {}", status, body)
|
||||
}
|
||||
}
|
||||
24
codex-rs/chatgpt/src/chatgpt_token.rs
Normal file
24
codex-rs/chatgpt/src/chatgpt_token.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use std::path::Path;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use codex_login::TokenData;
|
||||
|
||||
static CHATGPT_TOKEN: LazyLock<RwLock<Option<TokenData>>> = LazyLock::new(|| RwLock::new(None));
|
||||
|
||||
pub fn get_chatgpt_token_data() -> Option<TokenData> {
|
||||
CHATGPT_TOKEN.read().ok()?.clone()
|
||||
}
|
||||
|
||||
pub fn set_chatgpt_token_data(value: TokenData) {
|
||||
if let Ok(mut guard) = CHATGPT_TOKEN.write() {
|
||||
*guard = Some(value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize the ChatGPT token from auth.json file
|
||||
pub async fn init_chatgpt_token_from_auth(codex_home: &Path) -> std::io::Result<()> {
|
||||
let auth_json = codex_login::try_read_auth_json(codex_home).await?;
|
||||
set_chatgpt_token_data(auth_json.tokens.clone());
|
||||
Ok(())
|
||||
}
|
||||
40
codex-rs/chatgpt/src/get_task.rs
Normal file
40
codex-rs/chatgpt/src/get_task.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use codex_core::config::Config;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::chatgpt_client::chatgpt_get_request;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GetTaskResponse {
|
||||
pub current_diff_task_turn: Option<AssistantTurn>,
|
||||
}
|
||||
|
||||
// Only relevant fields for our extraction
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct AssistantTurn {
|
||||
pub output_items: Vec<OutputItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum OutputItem {
|
||||
#[serde(rename = "pr")]
|
||||
Pr(PrOutputItem),
|
||||
|
||||
#[serde(other)]
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct PrOutputItem {
|
||||
pub output_diff: OutputDiff,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct OutputDiff {
|
||||
pub diff: String,
|
||||
}
|
||||
|
||||
pub(crate) async fn get_task(config: &Config, task_id: String) -> anyhow::Result<GetTaskResponse> {
|
||||
let path = format!("/wham/tasks/{task_id}");
|
||||
chatgpt_get_request(config, path).await
|
||||
}
|
||||
4
codex-rs/chatgpt/src/lib.rs
Normal file
4
codex-rs/chatgpt/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod apply_command;
|
||||
mod chatgpt_client;
|
||||
mod chatgpt_token;
|
||||
pub mod get_task;
|
||||
181
codex-rs/chatgpt/tests/apply_command_e2e.rs
Normal file
181
codex-rs/chatgpt/tests/apply_command_e2e.rs
Normal file
@@ -0,0 +1,181 @@
|
||||
#![expect(clippy::expect_used)]
|
||||
|
||||
use codex_chatgpt::apply_command::apply_diff_from_task;
|
||||
use codex_chatgpt::get_task::GetTaskResponse;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Creates a temporary git repository with initial commit
|
||||
async fn create_temp_git_repo() -> anyhow::Result<TempDir> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let repo_path = temp_dir.path();
|
||||
|
||||
let output = Command::new("git")
|
||||
.args(["init"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Failed to initialize git repo: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Command::new("git")
|
||||
.args(["config", "user.email", "test@example.com"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
Command::new("git")
|
||||
.args(["config", "user.name", "Test User"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
std::fs::write(repo_path.join("README.md"), "# Test Repo\n")?;
|
||||
|
||||
Command::new("git")
|
||||
.args(["add", "README.md"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let output = Command::new("git")
|
||||
.args(["commit", "-m", "Initial commit"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Failed to create initial commit: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Ok(temp_dir)
|
||||
}
|
||||
|
||||
async fn mock_get_task_with_fixture() -> anyhow::Result<GetTaskResponse> {
|
||||
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/task_turn_fixture.json");
|
||||
let fixture_content = std::fs::read_to_string(fixture_path)?;
|
||||
let response: GetTaskResponse = serde_json::from_str(&fixture_content)?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_command_creates_fibonacci_file() {
|
||||
let temp_repo = create_temp_git_repo()
|
||||
.await
|
||||
.expect("Failed to create temp git repo");
|
||||
let repo_path = temp_repo.path();
|
||||
|
||||
let task_response = mock_get_task_with_fixture()
|
||||
.await
|
||||
.expect("Failed to load fixture");
|
||||
|
||||
apply_diff_from_task(task_response, Some(repo_path.to_path_buf()))
|
||||
.await
|
||||
.expect("Failed to apply diff from task");
|
||||
|
||||
// Assert that fibonacci.js was created in scripts/ directory
|
||||
let fibonacci_path = repo_path.join("scripts/fibonacci.js");
|
||||
assert!(fibonacci_path.exists(), "fibonacci.js was not created");
|
||||
|
||||
// Verify the file contents match expected
|
||||
let contents = std::fs::read_to_string(&fibonacci_path).expect("Failed to read fibonacci.js");
|
||||
assert!(
|
||||
contents.contains("function fibonacci(n)"),
|
||||
"fibonacci.js doesn't contain expected function"
|
||||
);
|
||||
assert!(
|
||||
contents.contains("#!/usr/bin/env node"),
|
||||
"fibonacci.js doesn't have shebang"
|
||||
);
|
||||
assert!(
|
||||
contents.contains("module.exports = fibonacci;"),
|
||||
"fibonacci.js doesn't export function"
|
||||
);
|
||||
|
||||
// Verify file has correct number of lines (31 as specified in fixture)
|
||||
let line_count = contents.lines().count();
|
||||
assert_eq!(
|
||||
line_count, 31,
|
||||
"fibonacci.js should have 31 lines, got {line_count}",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_command_with_merge_conflicts() {
|
||||
let temp_repo = create_temp_git_repo()
|
||||
.await
|
||||
.expect("Failed to create temp git repo");
|
||||
let repo_path = temp_repo.path();
|
||||
|
||||
// Create conflicting fibonacci.js file first
|
||||
let scripts_dir = repo_path.join("scripts");
|
||||
std::fs::create_dir_all(&scripts_dir).expect("Failed to create scripts directory");
|
||||
|
||||
let conflicting_content = r#"#!/usr/bin/env node
|
||||
|
||||
// This is a different fibonacci implementation
|
||||
function fib(num) {
|
||||
if (num <= 1) return num;
|
||||
return fib(num - 1) + fib(num - 2);
|
||||
}
|
||||
|
||||
console.log("Running fibonacci...");
|
||||
console.log(fib(10));
|
||||
"#;
|
||||
|
||||
let fibonacci_path = scripts_dir.join("fibonacci.js");
|
||||
std::fs::write(&fibonacci_path, conflicting_content).expect("Failed to write conflicting file");
|
||||
|
||||
Command::new("git")
|
||||
.args(["add", "scripts/fibonacci.js"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to add fibonacci.js");
|
||||
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "Add conflicting fibonacci implementation"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to commit conflicting file");
|
||||
|
||||
let original_dir = std::env::current_dir().expect("Failed to get current dir");
|
||||
std::env::set_current_dir(repo_path).expect("Failed to change directory");
|
||||
struct DirGuard(std::path::PathBuf);
|
||||
impl Drop for DirGuard {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::env::set_current_dir(&self.0);
|
||||
}
|
||||
}
|
||||
let _guard = DirGuard(original_dir);
|
||||
|
||||
let task_response = mock_get_task_with_fixture()
|
||||
.await
|
||||
.expect("Failed to load fixture");
|
||||
|
||||
let apply_result = apply_diff_from_task(task_response, Some(repo_path.to_path_buf())).await;
|
||||
|
||||
assert!(
|
||||
apply_result.is_err(),
|
||||
"Expected apply to fail due to merge conflicts"
|
||||
);
|
||||
|
||||
let contents = std::fs::read_to_string(&fibonacci_path).expect("Failed to read fibonacci.js");
|
||||
|
||||
assert!(
|
||||
contents.contains("<<<<<<< HEAD")
|
||||
|| contents.contains("=======")
|
||||
|| contents.contains(">>>>>>> "),
|
||||
"fibonacci.js should contain merge conflict markers, got: {contents}",
|
||||
);
|
||||
}
|
||||
65
codex-rs/chatgpt/tests/task_turn_fixture.json
Normal file
65
codex-rs/chatgpt/tests/task_turn_fixture.json
Normal file
@@ -0,0 +1,65 @@
|
||||
{
|
||||
"current_diff_task_turn": {
|
||||
"output_items": [
|
||||
{
|
||||
"type": "pr",
|
||||
"pr_title": "Add fibonacci script",
|
||||
"pr_message": "## Summary\n- add a basic Fibonacci script under `scripts/`\n\n## Testing\n- `node scripts/fibonacci.js 10`\n- `npm run lint` *(fails: next not found)*",
|
||||
"output_diff": {
|
||||
"type": "output_diff",
|
||||
"repo_id": "/workspace/rddit-vercel",
|
||||
"base_commit_sha": "1a2e9baf2ce2fdd0c126b47b1bcfd512de2a9f7b",
|
||||
"diff": "diff --git a/scripts/fibonacci.js b/scripts/fibonacci.js\nnew file mode 100644\nindex 0000000000000000000000000000000000000000..6c9fdfdbf8669b7968936411050525b995d0a9a6\n--- /dev/null\n+++ b/scripts/fibonacci.js\n@@ -0,0 +1,31 @@\n+#!/usr/bin/env node\n+\n+function fibonacci(n) {\n+ if (n < 0) {\n+ throw new Error(\"n must be non-negative\");\n+ }\n+ let a = 0;\n+ let b = 1;\n+ for (let i = 0; i < n; i++) {\n+ const next = a + b;\n+ a = b;\n+ b = next;\n+ }\n+ return a;\n+}\n+\n+function printUsage() {\n+ console.log(\"Usage: node scripts/fibonacci.js <n>\");\n+}\n+\n+if (require.main === module) {\n+ const arg = process.argv[2];\n+ if (arg === undefined || isNaN(Number(arg))) {\n+ printUsage();\n+ process.exit(1);\n+ }\n+ const n = Number(arg);\n+ console.log(fibonacci(n));\n+}\n+\n+module.exports = fibonacci;\n",
|
||||
"external_storage_diff": {
|
||||
"file_id": "file_00000000114c61f786900f8c2130ace7",
|
||||
"ttl": null
|
||||
},
|
||||
"files_modified": 1,
|
||||
"lines_added": 31,
|
||||
"lines_removed": 0,
|
||||
"commit_message": "Add fibonacci script"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "**Summary**\n\n- Created a command-line Fibonacci script that validates input and prints the result when executed with Node"
|
||||
},
|
||||
{
|
||||
"content_type": "repo_file_citation",
|
||||
"path": "scripts/fibonacci.js",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 31
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n\n**Testing**\n\n- ❌ `npm run lint` (failed to run `next lint`)"
|
||||
},
|
||||
{
|
||||
"content_type": "terminal_chunk_citation",
|
||||
"terminal_chunk_id": "7dd543",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 5
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n- ✅ `node scripts/fibonacci.js 10` produced “55”"
|
||||
},
|
||||
{
|
||||
"content_type": "terminal_chunk_citation",
|
||||
"terminal_chunk_id": "6ee559",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 3
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n\nCodex couldn't run certain commands due to environment limitations. Consider configuring a setup script or internet access in your Codex environment to install dependencies."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,8 @@ workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
clap_complete = "4"
|
||||
codex-chatgpt = { path = "../chatgpt" }
|
||||
codex-core = { path = "../core" }
|
||||
codex-common = { path = "../common", features = ["cli"] }
|
||||
codex-exec = { path = "../exec" }
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config_types::SandboxMode;
|
||||
use codex_core::exec::StdioPolicy;
|
||||
use codex_core::exec::spawn_command_under_linux_sandbox;
|
||||
use codex_core::exec::spawn_command_under_seatbelt;
|
||||
use codex_core::exec_env::create_env;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
use crate::LandlockCommand;
|
||||
use crate::SeatbeltCommand;
|
||||
@@ -20,13 +19,11 @@ pub async fn run_command_under_seatbelt(
|
||||
) -> anyhow::Result<()> {
|
||||
let SeatbeltCommand {
|
||||
full_auto,
|
||||
sandbox,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
sandbox,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
@@ -41,13 +38,11 @@ pub async fn run_command_under_landlock(
|
||||
) -> anyhow::Result<()> {
|
||||
let LandlockCommand {
|
||||
full_auto,
|
||||
sandbox,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
sandbox,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
@@ -63,20 +58,19 @@ enum SandboxType {
|
||||
|
||||
async fn run_command_under_sandbox(
|
||||
full_auto: bool,
|
||||
sandbox: SandboxPermissionOption,
|
||||
command: Vec<String>,
|
||||
config_overrides: CliConfigOverrides,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
sandbox_type: SandboxType,
|
||||
) -> anyhow::Result<()> {
|
||||
let sandbox_policy = create_sandbox_policy(full_auto, sandbox);
|
||||
let sandbox_mode = create_sandbox_mode(full_auto);
|
||||
let cwd = std::env::current_dir()?;
|
||||
let config = Config::load_with_cli_overrides(
|
||||
config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?,
|
||||
ConfigOverrides {
|
||||
sandbox_policy: Some(sandbox_policy),
|
||||
sandbox_mode: Some(sandbox_mode),
|
||||
codex_linux_sandbox_exe,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -110,13 +104,10 @@ async fn run_command_under_sandbox(
|
||||
handle_exit_status(status);
|
||||
}
|
||||
|
||||
pub fn create_sandbox_policy(full_auto: bool, sandbox: SandboxPermissionOption) -> SandboxPolicy {
|
||||
pub fn create_sandbox_mode(full_auto: bool) -> SandboxMode {
|
||||
if full_auto {
|
||||
SandboxPolicy::new_full_auto_policy()
|
||||
SandboxMode::WorkspaceWrite
|
||||
} else {
|
||||
match sandbox.permissions.map(Into::into) {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
SandboxMode::ReadOnly
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ pub mod proto;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct SeatbeltCommand {
|
||||
@@ -13,9 +12,6 @@ pub struct SeatbeltCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
@@ -30,9 +26,6 @@ pub struct LandlockCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
use clap::CommandFactory;
|
||||
use clap::Parser;
|
||||
use clap_complete::Shell;
|
||||
use clap_complete::generate;
|
||||
use codex_chatgpt::apply_command::ApplyCommand;
|
||||
use codex_chatgpt::apply_command::run_apply_command;
|
||||
use codex_cli::LandlockCommand;
|
||||
use codex_cli::SeatbeltCommand;
|
||||
use codex_cli::login::run_login_with_chatgpt;
|
||||
@@ -47,8 +52,22 @@ enum Subcommand {
|
||||
#[clap(visible_alias = "p")]
|
||||
Proto(ProtoCli),
|
||||
|
||||
/// Generate shell completion scripts.
|
||||
Completion(CompletionCommand),
|
||||
|
||||
/// Internal debugging commands.
|
||||
Debug(DebugArgs),
|
||||
|
||||
/// Apply the latest diff produced by Codex agent as a `git apply` to your local working tree.
|
||||
#[clap(visible_alias = "a")]
|
||||
Apply(ApplyCommand),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct CompletionCommand {
|
||||
/// Shell to generate completions for
|
||||
#[clap(value_enum, default_value_t = Shell::Bash)]
|
||||
shell: Shell,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -103,6 +122,9 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
prepend_config_flags(&mut proto_cli.config_overrides, cli.config_overrides);
|
||||
proto::run_main(proto_cli).await?;
|
||||
}
|
||||
Some(Subcommand::Completion(completion_cli)) => {
|
||||
print_completion(completion_cli);
|
||||
}
|
||||
Some(Subcommand::Debug(debug_args)) => match debug_args.cmd {
|
||||
DebugCommand::Seatbelt(mut seatbelt_cli) => {
|
||||
prepend_config_flags(&mut seatbelt_cli.config_overrides, cli.config_overrides);
|
||||
@@ -121,6 +143,10 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
Some(Subcommand::Apply(mut apply_cli)) => {
|
||||
prepend_config_flags(&mut apply_cli.config_overrides, cli.config_overrides);
|
||||
run_apply_command(apply_cli, None).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -136,3 +162,9 @@ fn prepend_config_flags(
|
||||
.raw_overrides
|
||||
.splice(0..0, cli_config_overrides.raw_overrides);
|
||||
}
|
||||
|
||||
fn print_completion(cmd: CompletionCommand) {
|
||||
let mut app = MultitoolCli::command();
|
||||
let name = "codex";
|
||||
generate(cmd.shell, &mut app, name, &mut std::io::stdout());
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ pub async fn run_main(opts: ProtoCli) -> anyhow::Result<()> {
|
||||
|
||||
let config = Config::load_with_cli_overrides(overrides_vec, ConfigOverrides::default())?;
|
||||
let ctrl_c = notify_on_sigint();
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let (codex, _init_id, _session_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let codex = Arc::new(codex);
|
||||
|
||||
// Task that reads JSON lines from stdin and forwards to Submission Queue
|
||||
|
||||
@@ -9,10 +9,11 @@ workspace = true
|
||||
[dependencies]
|
||||
clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
|
||||
codex-core = { path = "../core" }
|
||||
toml = { version = "0.8", optional = true }
|
||||
toml = { version = "0.9", optional = true }
|
||||
serde = { version = "1", optional = true }
|
||||
|
||||
[features]
|
||||
# Separate feature so that `clap` is not a mandatory dependency.
|
||||
cli = ["clap", "toml", "serde"]
|
||||
elapsed = []
|
||||
sandbox_summary = []
|
||||
|
||||
@@ -1,27 +1,23 @@
|
||||
//! Standard type to use with the `--approval-mode` CLI option.
|
||||
//! Available when the `cli` feature is enabled for the crate.
|
||||
|
||||
use clap::ArgAction;
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
|
||||
use codex_core::config::parse_sandbox_permission_with_base_path;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPermission;
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum ApprovalModeCliArg {
|
||||
/// Only run "trusted" commands (e.g. ls, cat, sed) without asking for user
|
||||
/// approval. Will escalate to the user if the model proposes a command that
|
||||
/// is not in the "trusted" set.
|
||||
Untrusted,
|
||||
|
||||
/// Run all commands without asking for user approval.
|
||||
/// Only asks for approval if a command fails to execute, in which case it
|
||||
/// will escalate to the user to ask for un-sandboxed execution.
|
||||
OnFailure,
|
||||
|
||||
/// Only run "known safe" commands (e.g. ls, cat, sed) without
|
||||
/// asking for user approval. Will escalate to the user if the model
|
||||
/// proposes a command that is not allow-listed.
|
||||
UnlessAllowListed,
|
||||
|
||||
/// Never ask for user approval
|
||||
/// Execution failures are immediately returned to the model.
|
||||
Never,
|
||||
@@ -30,44 +26,9 @@ pub enum ApprovalModeCliArg {
|
||||
impl From<ApprovalModeCliArg> for AskForApproval {
|
||||
fn from(value: ApprovalModeCliArg) -> Self {
|
||||
match value {
|
||||
ApprovalModeCliArg::Untrusted => AskForApproval::UnlessTrusted,
|
||||
ApprovalModeCliArg::OnFailure => AskForApproval::OnFailure,
|
||||
ApprovalModeCliArg::UnlessAllowListed => AskForApproval::UnlessAllowListed,
|
||||
ApprovalModeCliArg::Never => AskForApproval::Never,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct SandboxPermissionOption {
|
||||
/// Specify this flag multiple times to specify the full set of permissions
|
||||
/// to grant to Codex.
|
||||
///
|
||||
/// ```shell
|
||||
/// codex -s disk-full-read-access \
|
||||
/// -s disk-write-cwd \
|
||||
/// -s disk-write-platform-user-temp-folder \
|
||||
/// -s disk-write-platform-global-temp-folder
|
||||
/// ```
|
||||
///
|
||||
/// Note disk-write-folder takes a value:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-write-folder=$HOME/.pyenv/shims
|
||||
/// ```
|
||||
///
|
||||
/// These permissions are quite broad and should be used with caution:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-full-write-access
|
||||
/// -s network-full-access
|
||||
/// ```
|
||||
#[arg(long = "sandbox-permission", short = 's', action = ArgAction::Append, value_parser = parse_sandbox_permission)]
|
||||
pub permissions: Option<Vec<SandboxPermission>>,
|
||||
}
|
||||
|
||||
/// Custom value-parser so we can keep the CLI surface small *and*
|
||||
/// still handle the parameterised `disk-write-folder` case.
|
||||
fn parse_sandbox_permission(raw: &str) -> std::io::Result<SandboxPermission> {
|
||||
let base_path = std::env::current_dir()?;
|
||||
parse_sandbox_permission_with_base_path(raw, base_path)
|
||||
}
|
||||
|
||||
@@ -64,7 +64,11 @@ impl CliConfigOverrides {
|
||||
// `-c model=o3` without the quotes.
|
||||
let value: Value = match parse_toml_value(value_str) {
|
||||
Ok(v) => v,
|
||||
Err(_) => Value::String(value_str.to_string()),
|
||||
Err(_) => {
|
||||
// Strip leading/trailing quotes if present
|
||||
let trimmed = value_str.trim().trim_matches(|c| c == '"' || c == '\'');
|
||||
Value::String(trimmed.to_string())
|
||||
}
|
||||
};
|
||||
|
||||
Ok((key.to_string(), value))
|
||||
|
||||
@@ -20,7 +20,7 @@ pub fn format_duration(duration: Duration) -> String {
|
||||
|
||||
fn format_elapsed_millis(millis: i64) -> String {
|
||||
if millis < 1000 {
|
||||
format!("{}ms", millis)
|
||||
format!("{millis}ms")
|
||||
} else if millis < 60_000 {
|
||||
format!("{:.2}s", millis as f64 / 1000.0)
|
||||
} else {
|
||||
|
||||
@@ -6,11 +6,20 @@ pub mod elapsed;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::ApprovalModeCliArg;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::SandboxPermissionOption;
|
||||
mod sandbox_mode_cli_arg;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use sandbox_mode_cli_arg::SandboxModeCliArg;
|
||||
|
||||
#[cfg(any(feature = "cli", test))]
|
||||
mod config_override;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use config_override::CliConfigOverrides;
|
||||
|
||||
mod sandbox_summary;
|
||||
|
||||
#[cfg(feature = "sandbox_summary")]
|
||||
pub use sandbox_summary::summarize_sandbox_policy;
|
||||
|
||||
28
codex-rs/common/src/sandbox_mode_cli_arg.rs
Normal file
28
codex-rs/common/src/sandbox_mode_cli_arg.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
//! Standard type to use with the `--sandbox` (`-s`) CLI option.
|
||||
//!
|
||||
//! This mirrors the variants of [`codex_core::protocol::SandboxPolicy`], but
|
||||
//! without any of the associated data so it can be expressed as a simple flag
|
||||
//! on the command-line. Users that need to tweak the advanced options for
|
||||
//! `workspace-write` can continue to do so via `-c` overrides or their
|
||||
//! `config.toml`.
|
||||
|
||||
use clap::ValueEnum;
|
||||
use codex_core::config_types::SandboxMode;
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum SandboxModeCliArg {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
DangerFullAccess,
|
||||
}
|
||||
|
||||
impl From<SandboxModeCliArg> for SandboxMode {
|
||||
fn from(value: SandboxModeCliArg) -> Self {
|
||||
match value {
|
||||
SandboxModeCliArg::ReadOnly => SandboxMode::ReadOnly,
|
||||
SandboxModeCliArg::WorkspaceWrite => SandboxMode::WorkspaceWrite,
|
||||
SandboxModeCliArg::DangerFullAccess => SandboxMode::DangerFullAccess,
|
||||
}
|
||||
}
|
||||
}
|
||||
28
codex-rs/common/src/sandbox_summary.rs
Normal file
28
codex-rs/common/src/sandbox_summary.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
pub fn summarize_sandbox_policy(sandbox_policy: &SandboxPolicy) -> String {
|
||||
match sandbox_policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access,
|
||||
} => {
|
||||
let mut summary = "workspace-write".to_string();
|
||||
if !writable_roots.is_empty() {
|
||||
summary.push_str(&format!(
|
||||
" [{}]",
|
||||
writable_roots
|
||||
.iter()
|
||||
.map(|p| p.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
));
|
||||
}
|
||||
if *network_access {
|
||||
summary.push_str(" (network access enabled)");
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,41 +20,11 @@ The model that Codex should use.
|
||||
model = "o3" # overrides the default of "codex-mini-latest"
|
||||
```
|
||||
|
||||
## model_provider
|
||||
|
||||
Codex comes bundled with a number of "model providers" predefined. This config value is a string that indicates which provider to use. You can also define your own providers via `model_providers`.
|
||||
|
||||
For example, if you are running ollama with Mistral locally, then you would need to add the following to your config:
|
||||
|
||||
```toml
|
||||
model = "mistral"
|
||||
model_provider = "ollama"
|
||||
```
|
||||
|
||||
because the following definition for `ollama` is included in Codex:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
wire_api = "chat"
|
||||
```
|
||||
|
||||
This option defaults to `"openai"` and the corresponding provider is defined as follows:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "responses"
|
||||
```
|
||||
|
||||
## model_providers
|
||||
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the correspodning provider.
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the corresponding provider.
|
||||
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you could add the following configuration:
|
||||
|
||||
```toml
|
||||
# Recall that in TOML, root keys must be listed before tables.
|
||||
@@ -71,8 +41,94 @@ base_url = "https://api.openai.com/v1"
|
||||
# using Codex with this provider. The value of the environment variable must be
|
||||
# non-empty and will be used in the `Bearer TOKEN` HTTP header for the POST request.
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# valid values for wire_api are "chat" and "responses".
|
||||
# Valid values for wire_api are "chat" and "responses". Defaults to "chat" if omitted.
|
||||
wire_api = "chat"
|
||||
# If necessary, extra query params that need to be added to the URL.
|
||||
# See the Azure example below.
|
||||
query_params = {}
|
||||
```
|
||||
|
||||
Note this makes it possible to use Codex CLI with non-OpenAI models, so long as they use a wire API that is compatible with the OpenAI chat completions API. For example, you could define the following provider to use Codex CLI with Ollama running locally:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
```
|
||||
|
||||
Or a third-party provider (using a distinct environment variable for the API key):
|
||||
|
||||
```toml
|
||||
[model_providers.mistral]
|
||||
name = "Mistral"
|
||||
base_url = "https://api.mistral.ai/v1"
|
||||
env_key = "MISTRAL_API_KEY"
|
||||
```
|
||||
|
||||
Note that Azure requires `api-version` to be passed as a query parameter, so be sure to specify it as part of `query_params` when defining the Azure provider:
|
||||
|
||||
```toml
|
||||
[model_providers.azure]
|
||||
name = "Azure"
|
||||
# Make sure you set the appropriate subdomain for this URL.
|
||||
base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY" # Or "OPENAI_API_KEY", whichever you use.
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
```
|
||||
|
||||
It is also possible to configure a provider to include extra HTTP headers with a request. These can be hardcoded values (`http_headers`) or values read from environment variables (`env_http_headers`):
|
||||
|
||||
```toml
|
||||
[model_providers.example]
|
||||
# name, base_url, ...
|
||||
|
||||
# This will add the HTTP header `X-Example-Header` with value `example-value`
|
||||
# to each request to the model provider.
|
||||
http_headers = { "X-Example-Header" = "example-value" }
|
||||
|
||||
# This will add the HTTP header `X-Example-Features` with the value of the
|
||||
# `EXAMPLE_FEATURES` environment variable to each request to the model provider
|
||||
# _if_ the environment variable is set and its value is non-empty.
|
||||
env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }
|
||||
```
|
||||
|
||||
### Per-provider network tuning
|
||||
|
||||
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted top‑level keys; those are now ignored.)
|
||||
|
||||
Example:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# network tuning overrides (all optional; falls back to built‑in defaults)
|
||||
request_max_retries = 4 # retry failed HTTP requests
|
||||
stream_max_retries = 10 # retry dropped SSE streams
|
||||
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
||||
```
|
||||
|
||||
#### request_max_retries
|
||||
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
|
||||
|
||||
#### stream_max_retries
|
||||
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
|
||||
|
||||
#### stream_idle_timeout_ms
|
||||
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300_000` (5 minutes).
|
||||
|
||||
## model_provider
|
||||
|
||||
Identifies which provider to use from the `model_providers` map. Defaults to `"openai"`. You can override the `base_url` for the built-in `openai` provider via the `OPENAI_BASE_URL` environment variable.
|
||||
|
||||
Note that if you override `model_provider`, then you likely want to override
|
||||
`model`, as well. For example, if you are running ollama with Mistral locally,
|
||||
then you would need to add the following to your config in addition to the new entry in the `model_providers` map:
|
||||
|
||||
```toml
|
||||
model_provider = "ollama"
|
||||
model = "mistral"
|
||||
```
|
||||
|
||||
## approval_policy
|
||||
@@ -80,8 +136,13 @@ wire_api = "chat"
|
||||
Determines when the user should be prompted to approve whether Codex can execute a command:
|
||||
|
||||
```toml
|
||||
# This is analogous to --suggest in the TypeScript Codex CLI
|
||||
approval_policy = "unless-allow-listed"
|
||||
# Codex has hardcoded logic that defines a set of "trusted" commands.
|
||||
# Setting the approval_policy to `untrusted` means that Codex will prompt the
|
||||
# user before running a command not in the "trusted" set.
|
||||
#
|
||||
# See https://github.com/openai/codex/issues/1260 for the plan to enable
|
||||
# end-users to define their own trusted commands.
|
||||
approval_policy = "untrusted"
|
||||
```
|
||||
|
||||
```toml
|
||||
@@ -106,7 +167,6 @@ Here is an example of a `config.toml` that defines multiple profiles:
|
||||
```toml
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
disable_response_storage = false
|
||||
|
||||
# Setting `profile` is equivalent to specifying `--profile o3` on the command
|
||||
@@ -123,6 +183,8 @@ wire_api = "chat"
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
model_reasoning_effort = "high"
|
||||
model_reasoning_summary = "detailed"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
@@ -170,31 +232,55 @@ To disable reasoning summaries, set `model_reasoning_summary` to `"none"` in you
|
||||
model_reasoning_summary = "none" # disable reasoning summaries
|
||||
```
|
||||
|
||||
## sandbox_permissions
|
||||
## model_supports_reasoning_summaries
|
||||
|
||||
List of permissions to grant to the sandbox that Codex uses to execute untrusted commands:
|
||||
By default, `reasoning` is only set on requests to OpenAI models that are known to support them. To force `reasoning` to set on requests to the current model, you can force this behavior by setting the following in `config.toml`:
|
||||
|
||||
```toml
|
||||
# This is comparable to --full-auto in the TypeScript Codex CLI, though
|
||||
# specifying `disk-write-platform-global-temp-folder` adds /tmp as a writable
|
||||
# folder in addition to $TMPDIR.
|
||||
sandbox_permissions = [
|
||||
"disk-full-read-access",
|
||||
"disk-write-platform-user-temp-folder",
|
||||
"disk-write-platform-global-temp-folder",
|
||||
"disk-write-cwd",
|
||||
]
|
||||
model_supports_reasoning_summaries = true
|
||||
```
|
||||
|
||||
To add additional writable folders, use `disk-write-folder`, which takes a parameter (this can be specified multiple times):
|
||||
## sandbox_mode
|
||||
|
||||
Codex executes model-generated shell commands inside an OS-level sandbox.
|
||||
|
||||
In most cases you can pick the desired behaviour with a single option:
|
||||
|
||||
```toml
|
||||
sandbox_permissions = [
|
||||
# ...
|
||||
"disk-write-folder=/Users/mbolin/.pyenv/shims",
|
||||
]
|
||||
# same as `--sandbox read-only`
|
||||
sandbox_mode = "read-only"
|
||||
```
|
||||
|
||||
The default policy is `read-only`, which means commands can read any file on
|
||||
disk, but attempts to write a file or access the network will be blocked.
|
||||
|
||||
A more relaxed policy is `workspace-write`. When specified, the current working directory for the Codex task will be writable (as well as `$TMPDIR` on macOS). Note that the CLI defaults to using the directory where it was spawned as `cwd`, though this can be overridden using `--cwd/-C`.
|
||||
|
||||
```toml
|
||||
# same as `--sandbox workspace-write`
|
||||
sandbox_mode = "workspace-write"
|
||||
|
||||
# Extra settings that only apply when `sandbox = "workspace-write"`.
|
||||
[sandbox_workspace_write]
|
||||
# By default, only the cwd for the Codex session will be writable (and $TMPDIR
|
||||
# on macOS), but you can specify additional writable folders in this array.
|
||||
writable_roots = ["/tmp"]
|
||||
# Allow the command being run inside the sandbox to make outbound network
|
||||
# requests. Disabled by default.
|
||||
network_access = false
|
||||
```
|
||||
|
||||
To disable sandboxing altogether, specify `danger-full-access` like so:
|
||||
|
||||
```toml
|
||||
# same as `--sandbox danger-full-access`
|
||||
sandbox_mode = "danger-full-access"
|
||||
```
|
||||
|
||||
This is reasonable to use if Codex is running in an environment that provides its own sandboxing (such as a Docker container) such that further sandboxing is unnecessary.
|
||||
|
||||
Though using this option may also be necessary if you try to use Codex in environments where its native sandboxing mechanisms are unsupported, such as older Linux kernels or on Windows.
|
||||
|
||||
## mcp_servers
|
||||
|
||||
Defines the list of MCP servers that Codex can consult for tool use. Currently, only servers that are launched by executing a program that communicate over stdio are supported. For servers that use the SSE transport, consider an adapter like [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy).
|
||||
@@ -384,7 +470,7 @@ Currently, `"vscode"` is the default, though Codex does not verify VS Code is in
|
||||
|
||||
## hide_agent_reasoning
|
||||
|
||||
Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
|
||||
Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:
|
||||
|
||||
@@ -392,6 +478,16 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the
|
||||
hide_agent_reasoning = true # defaults to false
|
||||
```
|
||||
|
||||
## model_context_window
|
||||
|
||||
The size of the context window for the model, in tokens.
|
||||
|
||||
In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
|
||||
|
||||
## model_max_output_tokens
|
||||
|
||||
This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
|
||||
|
||||
## project_doc_max_bytes
|
||||
|
||||
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
|
||||
|
||||
@@ -13,27 +13,24 @@ workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
async-channel = "2.3.1"
|
||||
base64 = "0.21"
|
||||
base64 = "0.22"
|
||||
bytes = "1.10.1"
|
||||
codex-apply-patch = { path = "../apply-patch" }
|
||||
codex-login = { path = "../login" }
|
||||
codex-mcp-client = { path = "../mcp-client" }
|
||||
dirs = "6"
|
||||
env-flags = "0.1.1"
|
||||
eventsource-stream = "0.2.3"
|
||||
fs2 = "0.4.3"
|
||||
fs-err = "3.1.0"
|
||||
futures = "0.3"
|
||||
libc = "0.2.174"
|
||||
mcp-types = { path = "../mcp-types" }
|
||||
mime_guess = "2.0"
|
||||
patch = "0.7"
|
||||
path-absolutize = "3.1.1"
|
||||
rand = "0.9"
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
strum = "0.27.1"
|
||||
strum_macros = "0.27.1"
|
||||
sha1 = "0.10.6"
|
||||
strum_macros = "0.27.2"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
|
||||
tokio = { version = "1", features = [
|
||||
@@ -44,10 +41,10 @@ tokio = { version = "1", features = [
|
||||
"signal",
|
||||
] }
|
||||
tokio-util = "0.7.14"
|
||||
toml = "0.8.20"
|
||||
toml = "0.9.1"
|
||||
tracing = { version = "0.1.41", features = ["log"] }
|
||||
tree-sitter = "0.25.3"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter = "0.25.8"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
uuid = { version = "1", features = ["serde", "v4"] }
|
||||
wildmatch = "2.4.0"
|
||||
|
||||
@@ -69,4 +66,6 @@ maplit = "1.0.2"
|
||||
predicates = "3"
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
tokio-test = "0.4"
|
||||
walkdir = "2.5.0"
|
||||
wiremock = "0.6"
|
||||
|
||||
@@ -21,8 +21,6 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::models::ContentItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_chat_completions_api;
|
||||
@@ -43,7 +41,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
|
||||
for item in &prompt.input {
|
||||
match item {
|
||||
ResponseItem::Message { role, content } => {
|
||||
ResponseItem::Message { role, content, .. } => {
|
||||
let mut text = String::new();
|
||||
for c in content {
|
||||
match c {
|
||||
@@ -60,6 +58,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
messages.push(json!({
|
||||
"role": "assistant",
|
||||
@@ -114,23 +113,19 @@ pub(crate) async fn stream_chat_completions(
|
||||
"tools": tools_json,
|
||||
});
|
||||
|
||||
let base_url = provider.base_url.trim_end_matches('/');
|
||||
let url = format!("{}/chat/completions", base_url);
|
||||
|
||||
debug!(
|
||||
"POST to {url}: {}",
|
||||
"POST to {}: {}",
|
||||
provider.get_full_url(),
|
||||
serde_json::to_string_pretty(&payload).unwrap_or_default()
|
||||
);
|
||||
|
||||
let api_key = provider.api_key()?;
|
||||
let mut attempt = 0;
|
||||
let max_retries = provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
let mut req_builder = client.post(&url);
|
||||
if let Some(api_key) = &api_key {
|
||||
req_builder = req_builder.bearer_auth(api_key.clone());
|
||||
}
|
||||
let req_builder = provider.create_request_builder(client)?;
|
||||
|
||||
let res = req_builder
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&payload)
|
||||
@@ -139,9 +134,13 @@ pub(crate) async fn stream_chat_completions(
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
|
||||
tokio::spawn(process_chat_sse(stream, tx_event));
|
||||
tokio::spawn(process_chat_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
));
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
Ok(res) => {
|
||||
@@ -151,7 +150,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -167,7 +166,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
@@ -180,14 +179,15 @@ pub(crate) async fn stream_chat_completions(
|
||||
/// Lightweight SSE processor for the Chat Completions streaming format. The
|
||||
/// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
|
||||
/// of the pipeline can stay agnostic of the underlying wire format.
|
||||
async fn process_chat_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
|
||||
where
|
||||
async fn process_chat_sse<S>(
|
||||
stream: S,
|
||||
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
S: Stream<Item = Result<Bytes>> + Unpin,
|
||||
{
|
||||
let mut stream = stream.eventsource();
|
||||
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
|
||||
// State to accumulate a function call across streaming chunks.
|
||||
// OpenAI may split the `arguments` string over multiple `delta` events
|
||||
// until the chunk whose `finish_reason` is `tool_calls` is emitted. We
|
||||
@@ -215,6 +215,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -232,6 +233,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -258,6 +260,7 @@ where
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: content.to_string(),
|
||||
}],
|
||||
id: None,
|
||||
};
|
||||
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
@@ -299,6 +302,7 @@ where
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// Build the FunctionCall response item.
|
||||
let item = ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
|
||||
arguments: fn_call_state.arguments.clone(),
|
||||
call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
|
||||
@@ -317,6 +321,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
|
||||
@@ -394,9 +399,13 @@ where
|
||||
// Not an assistant message – forward immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}))) => {
|
||||
if !this.cumulative.is_empty() {
|
||||
let aggregated_item = crate::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![crate::models::ContentItem::OutputText {
|
||||
text: std::mem::take(&mut this.cumulative),
|
||||
@@ -404,7 +413,10 @@ where
|
||||
};
|
||||
|
||||
// Buffer Completed so it is returned *after* the aggregated message.
|
||||
this.pending_completed = Some(ResponseEvent::Completed { response_id });
|
||||
this.pending_completed = Some(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
});
|
||||
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
|
||||
aggregated_item,
|
||||
@@ -412,8 +424,22 @@ where
|
||||
}
|
||||
|
||||
// Nothing aggregated – forward Completed directly.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
|
||||
} // No other `Ok` variants exist at the moment, continue polling.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
})));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
||||
// These events are exclusive to the Responses API and
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(_))))
|
||||
| Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
|
||||
// Deltas are ignored here since aggregation waits for the
|
||||
// final OutputItemDone.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -427,7 +453,7 @@ pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Size
|
||||
///
|
||||
/// ```ignore
|
||||
/// OutputItemDone(<full message>)
|
||||
/// Completed { .. }
|
||||
/// Completed
|
||||
/// ```
|
||||
///
|
||||
/// No other `OutputItemDone` events will be seen by the caller.
|
||||
|
||||
@@ -15,6 +15,7 @@ use tokio_util::io::ReaderStream;
|
||||
use tracing::debug;
|
||||
use tracing::trace;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::chat_completions::AggregateStreamExt;
|
||||
use crate::chat_completions::stream_chat_completions;
|
||||
@@ -23,40 +24,43 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::client_common::ResponsesApiRequest;
|
||||
use crate::client_common::create_reasoning_param_for_request;
|
||||
use crate::config::Config;
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::error::Result;
|
||||
use crate::flags::CODEX_RS_SSE_FIXTURE;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_responses_api;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::util::backoff;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ModelClient {
|
||||
model: String,
|
||||
config: Arc<Config>,
|
||||
client: reqwest::Client,
|
||||
provider: ModelProviderInfo,
|
||||
session_id: Uuid,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
}
|
||||
|
||||
impl ModelClient {
|
||||
pub fn new(
|
||||
model: impl ToString,
|
||||
config: Arc<Config>,
|
||||
provider: ModelProviderInfo,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
session_id: Uuid,
|
||||
) -> Self {
|
||||
Self {
|
||||
model: model.to_string(),
|
||||
config,
|
||||
client: reqwest::Client::new(),
|
||||
provider,
|
||||
session_id,
|
||||
effort,
|
||||
summary,
|
||||
}
|
||||
@@ -70,9 +74,13 @@ impl ModelClient {
|
||||
WireApi::Responses => self.stream_responses(prompt).await,
|
||||
WireApi::Chat => {
|
||||
// Create the raw streaming connection first.
|
||||
let response_stream =
|
||||
stream_chat_completions(prompt, &self.model, &self.client, &self.provider)
|
||||
.await?;
|
||||
let response_stream = stream_chat_completions(
|
||||
prompt,
|
||||
&self.config.model,
|
||||
&self.client,
|
||||
&self.provider,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Wrap it with the aggregation adapter so callers see *only*
|
||||
// the final assistant message per turn (matching the
|
||||
@@ -103,56 +111,77 @@ impl ModelClient {
|
||||
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
|
||||
// short circuit for tests
|
||||
warn!(path, "Streaming from fixture");
|
||||
return stream_from_fixture(path).await;
|
||||
return stream_from_fixture(path, self.provider.clone()).await;
|
||||
}
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(&self.model);
|
||||
let tools_json = create_tools_json_for_responses_api(prompt, &self.model)?;
|
||||
let reasoning = create_reasoning_param_for_request(&self.model, self.effort, self.summary);
|
||||
let full_instructions = prompt.get_full_instructions(&self.config.model);
|
||||
let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
|
||||
let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
|
||||
|
||||
// Request encrypted COT if we are not storing responses,
|
||||
// otherwise reasoning items will be referenced by ID
|
||||
let include = if !prompt.store && reasoning.is_some() {
|
||||
vec!["reasoning.encrypted_content".to_string()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let payload = ResponsesApiRequest {
|
||||
model: &self.model,
|
||||
model: &self.config.model,
|
||||
instructions: &full_instructions,
|
||||
input: &prompt.input,
|
||||
tools: &tools_json,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
reasoning,
|
||||
previous_response_id: prompt.prev_id.clone(),
|
||||
store: prompt.store,
|
||||
// TODO: make this configurable
|
||||
stream: true,
|
||||
include,
|
||||
};
|
||||
|
||||
let base_url = self.provider.base_url.clone();
|
||||
let base_url = base_url.trim_end_matches('/');
|
||||
let url = format!("{}/responses", base_url);
|
||||
trace!("POST to {url}: {}", serde_json::to_string(&payload)?);
|
||||
trace!(
|
||||
"POST to {}: {}",
|
||||
self.provider.get_full_url(),
|
||||
serde_json::to_string(&payload)?
|
||||
);
|
||||
|
||||
let mut attempt = 0;
|
||||
let max_retries = self.provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
let api_key = self.provider.api_key()?.ok_or_else(|| {
|
||||
CodexErr::EnvVar(EnvVarError {
|
||||
var: self.provider.env_key.clone().unwrap_or_default(),
|
||||
instructions: None,
|
||||
})
|
||||
})?;
|
||||
let res = self
|
||||
.client
|
||||
.post(&url)
|
||||
.bearer_auth(api_key)
|
||||
let req_builder = self
|
||||
.provider
|
||||
.create_request_builder(&self.client)?
|
||||
.header("OpenAI-Beta", "responses=experimental")
|
||||
.header("session_id", self.session_id.to_string())
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await;
|
||||
.json(&payload);
|
||||
|
||||
let res = req_builder.send().await;
|
||||
if let Ok(resp) = &res {
|
||||
trace!(
|
||||
"Response status: {}, request-id: {}",
|
||||
resp.status(),
|
||||
resp.headers()
|
||||
.get("x-request-id")
|
||||
.map(|v| v.to_str().unwrap_or_default())
|
||||
.unwrap_or_default()
|
||||
);
|
||||
}
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
|
||||
// spawn task to process SSE
|
||||
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
|
||||
tokio::spawn(process_sse(stream, tx_event));
|
||||
tokio::spawn(process_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
self.provider.stream_idle_timeout(),
|
||||
));
|
||||
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
@@ -167,11 +196,11 @@ impl ModelClient {
|
||||
// negligible.
|
||||
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
|
||||
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
|
||||
let body = (res.text().await).unwrap_or_default();
|
||||
let body = res.text().await.unwrap_or_default();
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -188,7 +217,7 @@ impl ModelClient {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
@@ -197,6 +226,10 @@ impl ModelClient {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_provider(&self) -> ModelProviderInfo {
|
||||
self.provider.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
@@ -205,23 +238,61 @@ struct SseEvent {
|
||||
kind: String,
|
||||
response: Option<Value>,
|
||||
item: Option<Value>,
|
||||
delta: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCreated {}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompleted {
|
||||
id: String,
|
||||
usage: Option<ResponseCompletedUsage>,
|
||||
}
|
||||
|
||||
async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
|
||||
where
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedUsage {
|
||||
input_tokens: u64,
|
||||
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
|
||||
output_tokens: u64,
|
||||
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
|
||||
total_tokens: u64,
|
||||
}
|
||||
|
||||
impl From<ResponseCompletedUsage> for TokenUsage {
|
||||
fn from(val: ResponseCompletedUsage) -> Self {
|
||||
TokenUsage {
|
||||
input_tokens: val.input_tokens,
|
||||
cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
|
||||
output_tokens: val.output_tokens,
|
||||
reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
|
||||
total_tokens: val.total_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedInputTokensDetails {
|
||||
cached_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedOutputTokensDetails {
|
||||
reasoning_tokens: u64,
|
||||
}
|
||||
|
||||
async fn process_sse<S>(
|
||||
stream: S,
|
||||
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
S: Stream<Item = Result<Bytes>> + Unpin,
|
||||
{
|
||||
let mut stream = stream.eventsource();
|
||||
|
||||
// If the stream stays completely silent for an extended period treat it as disconnected.
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
// The response id returned from the "complete" message.
|
||||
let mut response_id = None;
|
||||
let mut response_completed: Option<ResponseCompleted> = None;
|
||||
|
||||
loop {
|
||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||
@@ -233,9 +304,15 @@ where
|
||||
return;
|
||||
}
|
||||
Ok(None) => {
|
||||
match response_id {
|
||||
Some(response_id) => {
|
||||
let event = ResponseEvent::Completed { response_id };
|
||||
match response_completed {
|
||||
Some(ResponseCompleted {
|
||||
id: response_id,
|
||||
usage,
|
||||
}) => {
|
||||
let event = ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: usage.map(Into::into),
|
||||
};
|
||||
let _ = tx_event.send(Ok(event)).await;
|
||||
}
|
||||
None => {
|
||||
@@ -274,7 +351,7 @@ where
|
||||
// duplicated `output` array embedded in the `response.completed`
|
||||
// payload. That produced two concrete issues:
|
||||
// 1. No real‑time streaming – the user only saw output after the
|
||||
// entire turn had finished, which broke the “typing” UX and
|
||||
// entire turn had finished, which broke the "typing" UX and
|
||||
// made long‑running turns look stalled.
|
||||
// 2. Duplicate `function_call_output` items – both the
|
||||
// individual *and* the completed array were forwarded, which
|
||||
@@ -296,12 +373,46 @@ where
|
||||
return;
|
||||
}
|
||||
}
|
||||
"response.output_text.delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
let event = ResponseEvent::OutputTextDelta(delta);
|
||||
if tx_event.send(Ok(event)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
"response.reasoning_summary_text.delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
let event = ResponseEvent::ReasoningSummaryDelta(delta);
|
||||
if tx_event.send(Ok(event)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
"response.created" => {
|
||||
if event.response.is_some() {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::Created {})).await;
|
||||
}
|
||||
}
|
||||
"response.failed" => {
|
||||
if let Some(resp_val) = event.response {
|
||||
let error = resp_val
|
||||
.get("error")
|
||||
.and_then(|v| v.get("message"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("response.failed event received");
|
||||
|
||||
let _ = tx_event
|
||||
.send(Err(CodexErr::Stream(error.to_string())))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
// Final response completed – includes array of output items & id
|
||||
"response.completed" => {
|
||||
if let Some(resp_val) = event.response {
|
||||
match serde_json::from_value::<ResponseCompleted>(resp_val) {
|
||||
Ok(r) => {
|
||||
response_id = Some(r.id);
|
||||
response_completed = Some(r);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("failed to parse ResponseCompleted: {e}");
|
||||
@@ -311,14 +422,11 @@ where
|
||||
};
|
||||
}
|
||||
"response.content_part.done"
|
||||
| "response.created"
|
||||
| "response.function_call_arguments.delta"
|
||||
| "response.in_progress"
|
||||
| "response.output_item.added"
|
||||
| "response.output_text.delta"
|
||||
| "response.output_text.done"
|
||||
| "response.reasoning_summary_part.added"
|
||||
| "response.reasoning_summary_text.delta"
|
||||
| "response.reasoning_summary_text.done" => {
|
||||
// Currently, we ignore these events, but we handle them
|
||||
// separately to skip the logging message in the `other` case.
|
||||
@@ -329,8 +437,11 @@ where
|
||||
}
|
||||
|
||||
/// used in tests to stream from a text SSE file
|
||||
async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
async fn stream_from_fixture(
|
||||
path: impl AsRef<Path>,
|
||||
provider: ModelProviderInfo,
|
||||
) -> Result<ResponseStream> {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
let f = std::fs::File::open(path.as_ref())?;
|
||||
let lines = std::io::BufReader::new(f).lines();
|
||||
|
||||
@@ -343,6 +454,299 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
|
||||
|
||||
let rdr = std::io::Cursor::new(content);
|
||||
let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);
|
||||
tokio::spawn(process_sse(stream, tx_event));
|
||||
tokio::spawn(process_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
));
|
||||
Ok(ResponseStream { rx_event })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_test::io::Builder as IoBuilder;
|
||||
use tokio_util::io::ReaderStream;
|
||||
|
||||
// ────────────────────────────
|
||||
// Helpers
|
||||
// ────────────────────────────
|
||||
|
||||
/// Runs the SSE parser on pre-chunked byte slices and returns every event
|
||||
/// (including any final `Err` from a stream-closure check).
|
||||
async fn collect_events(
|
||||
chunks: &[&[u8]],
|
||||
provider: ModelProviderInfo,
|
||||
) -> Vec<Result<ResponseEvent>> {
|
||||
let mut builder = IoBuilder::new();
|
||||
for chunk in chunks {
|
||||
builder.read(chunk);
|
||||
}
|
||||
|
||||
let reader = builder.build();
|
||||
let stream = ReaderStream::new(reader).map_err(CodexErr::Io);
|
||||
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
|
||||
|
||||
let mut events = Vec::new();
|
||||
while let Some(ev) = rx.recv().await {
|
||||
events.push(ev);
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
/// Builds an in-memory SSE stream from JSON fixtures and returns only the
|
||||
/// successfully parsed events (panics on internal channel errors).
|
||||
async fn run_sse(
|
||||
events: Vec<serde_json::Value>,
|
||||
provider: ModelProviderInfo,
|
||||
) -> Vec<ResponseEvent> {
|
||||
let mut body = String::new();
|
||||
for e in events {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
body.push_str(&format!("event: {kind}\n\n"));
|
||||
} else {
|
||||
body.push_str(&format!("event: {kind}\ndata: {e}\n\n"));
|
||||
}
|
||||
}
|
||||
|
||||
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8);
|
||||
let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);
|
||||
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
|
||||
|
||||
let mut out = Vec::new();
|
||||
while let Some(ev) = rx.recv().await {
|
||||
out.push(ev.expect("channel closed"));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// ────────────────────────────
|
||||
// Tests from `implement-test-for-responses-api-sse-parser`
|
||||
// ────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn parses_items_and_completed() {
|
||||
let item1 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let item2 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "World"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let completed = json!({
|
||||
"type": "response.completed",
|
||||
"response": { "id": "resp1" }
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
|
||||
let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n");
|
||||
let sse3 = format!("event: response.completed\ndata: {completed}\n\n");
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: "https://test.com".to_string(),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
};
|
||||
|
||||
let events = collect_events(
|
||||
&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],
|
||||
provider,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(events.len(), 3);
|
||||
|
||||
matches!(
|
||||
&events[0],
|
||||
Ok(ResponseEvent::OutputItemDone(ResponseItem::Message { role, .. }))
|
||||
if role == "assistant"
|
||||
);
|
||||
|
||||
matches!(
|
||||
&events[1],
|
||||
Ok(ResponseEvent::OutputItemDone(ResponseItem::Message { role, .. }))
|
||||
if role == "assistant"
|
||||
);
|
||||
|
||||
match &events[2] {
|
||||
Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}) => {
|
||||
assert_eq!(response_id, "resp1");
|
||||
assert!(token_usage.is_none());
|
||||
}
|
||||
other => panic!("unexpected third event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn error_when_missing_completed() {
|
||||
let item1 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: "https://test.com".to_string(),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
};
|
||||
|
||||
let events = collect_events(&[sse1.as_bytes()], provider).await;
|
||||
|
||||
assert_eq!(events.len(), 2);
|
||||
|
||||
matches!(events[0], Ok(ResponseEvent::OutputItemDone(_)));
|
||||
|
||||
match &events[1] {
|
||||
Err(CodexErr::Stream(msg)) => {
|
||||
assert_eq!(msg, "stream closed before response.completed")
|
||||
}
|
||||
other => panic!("unexpected second event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────
|
||||
// Table-driven test from `main`
|
||||
// ────────────────────────────
|
||||
|
||||
/// Verifies that the adapter produces the right `ResponseEvent` for a
|
||||
/// variety of incoming `type` values.
|
||||
#[tokio::test]
|
||||
async fn table_driven_event_kinds() {
|
||||
struct TestCase {
|
||||
name: &'static str,
|
||||
event: serde_json::Value,
|
||||
expect_first: fn(&ResponseEvent) -> bool,
|
||||
expected_len: usize,
|
||||
}
|
||||
|
||||
fn is_created(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::Created)
|
||||
}
|
||||
fn is_output(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::OutputItemDone(_))
|
||||
}
|
||||
fn is_completed(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::Completed { .. })
|
||||
}
|
||||
|
||||
let completed = json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "c",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
});
|
||||
|
||||
let cases = vec![
|
||||
TestCase {
|
||||
name: "created",
|
||||
event: json!({"type": "response.created", "response": {}}),
|
||||
expect_first: is_created,
|
||||
expected_len: 2,
|
||||
},
|
||||
TestCase {
|
||||
name: "output_item.done",
|
||||
event: json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": "hi"}
|
||||
]
|
||||
}
|
||||
}),
|
||||
expect_first: is_output,
|
||||
expected_len: 2,
|
||||
},
|
||||
TestCase {
|
||||
name: "unknown",
|
||||
event: json!({"type": "response.new_tool_event"}),
|
||||
expect_first: is_completed,
|
||||
expected_len: 1,
|
||||
},
|
||||
];
|
||||
|
||||
for case in cases {
|
||||
let mut evs = vec![case.event];
|
||||
evs.push(completed.clone());
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: "https://test.com".to_string(),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
};
|
||||
|
||||
let out = run_sse(evs, provider).await;
|
||||
assert_eq!(out.len(), case.expected_len, "case {}", case.name);
|
||||
assert!(
|
||||
(case.expect_first)(&out[0]),
|
||||
"first event mismatch in case {}",
|
||||
case.name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::Result;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::protocol::TokenUsage;
|
||||
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
||||
use futures::Stream;
|
||||
use serde::Serialize;
|
||||
@@ -21,8 +22,6 @@ const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
|
||||
pub struct Prompt {
|
||||
/// Conversation context input items.
|
||||
pub input: Vec<ResponseItem>,
|
||||
/// Optional previous response ID (when storage is enabled).
|
||||
pub prev_id: Option<String>,
|
||||
/// Optional instructions from the user to amend to the built-in agent
|
||||
/// instructions.
|
||||
pub user_instructions: Option<String>,
|
||||
@@ -33,11 +32,18 @@ pub struct Prompt {
|
||||
/// the "fully qualified" tool name (i.e., prefixed with the server name),
|
||||
/// which should be reported to the model in place of Tool::name.
|
||||
pub extra_tools: HashMap<String, mcp_types::Tool>,
|
||||
|
||||
/// Optional override for the built-in BASE_INSTRUCTIONS.
|
||||
pub base_instructions_override: Option<String>,
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<str> {
|
||||
let mut sections: Vec<&str> = vec![BASE_INSTRUCTIONS];
|
||||
pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<'_, str> {
|
||||
let base = self
|
||||
.base_instructions_override
|
||||
.as_deref()
|
||||
.unwrap_or(BASE_INSTRUCTIONS);
|
||||
let mut sections: Vec<&str> = vec![base];
|
||||
if let Some(ref user) = self.user_instructions {
|
||||
sections.push(user);
|
||||
}
|
||||
@@ -50,8 +56,14 @@ impl Prompt {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
Completed { response_id: String },
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
},
|
||||
OutputTextDelta(String),
|
||||
ReasoningSummaryDelta(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -119,22 +131,22 @@ pub(crate) struct ResponsesApiRequest<'a> {
|
||||
pub(crate) tool_choice: &'static str,
|
||||
pub(crate) parallel_tool_calls: bool,
|
||||
pub(crate) reasoning: Option<Reasoning>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) previous_response_id: Option<String>,
|
||||
/// true when using the Responses API.
|
||||
pub(crate) store: bool,
|
||||
pub(crate) stream: bool,
|
||||
pub(crate) include: Vec<String>,
|
||||
}
|
||||
|
||||
use crate::config::Config;
|
||||
|
||||
pub(crate) fn create_reasoning_param_for_request(
|
||||
model: &str,
|
||||
config: &Config,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
) -> Option<Reasoning> {
|
||||
let effort: Option<OpenAiReasoningEffort> = effort.into();
|
||||
let effort = effort?;
|
||||
|
||||
if model_supports_reasoning_summaries(model) {
|
||||
if model_supports_reasoning_summaries(config) {
|
||||
let effort: Option<OpenAiReasoningEffort> = effort.into();
|
||||
let effort = effort?;
|
||||
Some(Reasoning {
|
||||
effort,
|
||||
summary: summary.into(),
|
||||
@@ -144,19 +156,24 @@ pub(crate) fn create_reasoning_param_for_request(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn model_supports_reasoning_summaries(model: &str) -> bool {
|
||||
// Currently, we hardcode this rule to decide whether enable reasoning.
|
||||
pub fn model_supports_reasoning_summaries(config: &Config) -> bool {
|
||||
// Currently, we hardcode this rule to decide whether to enable reasoning.
|
||||
// We expect reasoning to apply only to OpenAI models, but we do not want
|
||||
// users to have to mess with their config to disable reasoning for models
|
||||
// that do not support it, such as `gpt-4.1`.
|
||||
//
|
||||
// Though if a user is using Codex with non-OpenAI models that, say, happen
|
||||
// to start with "o", then they can set `model_reasoning_effort = "none` in
|
||||
// to start with "o", then they can set `model_reasoning_effort = "none"` in
|
||||
// config.toml to disable reasoning.
|
||||
//
|
||||
// Ultimately, this should also be configurable in config.toml, but we
|
||||
// need to have defaults that "just work." Perhaps we could have a
|
||||
// "reasoning models pattern" as part of ModelProviderInfo?
|
||||
// Converseley, if a user has a non-OpenAI provider that supports reasoning,
|
||||
// they can set the top-level `model_supports_reasoning_summaries = true`
|
||||
// config option to enable reasoning.
|
||||
if config.model_supports_reasoning_summaries {
|
||||
return true;
|
||||
}
|
||||
|
||||
let model = &config.model;
|
||||
model.starts_with("o") || model.starts_with("codex")
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// Poisoned mutex should fail the program
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
@@ -33,7 +34,6 @@ use tracing::trace;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::WireApi;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
@@ -48,9 +48,7 @@ use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::process_exec_tool_call;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
|
||||
use crate::mcp_connection_manager::McpConnectionManager;
|
||||
use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
|
||||
use crate::mcp_tool_call::handle_mcp_tool_call;
|
||||
use crate::models::ContentItem;
|
||||
use crate::models::FunctionCallOutputPayload;
|
||||
@@ -60,7 +58,9 @@ use crate::models::ResponseInputItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::models::ShellToolCallParams;
|
||||
use crate::project_doc::get_user_instructions;
|
||||
use crate::protocol::AgentMessageDeltaEvent;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::AgentReasoningDeltaEvent;
|
||||
use crate::protocol::AgentReasoningEvent;
|
||||
use crate::protocol::ApplyPatchApprovalRequestEvent;
|
||||
use crate::protocol::AskForApproval;
|
||||
@@ -100,26 +100,37 @@ impl Codex {
|
||||
/// Spawn a new [`Codex`] and initialize the session. Returns the instance
|
||||
/// of `Codex` and the ID of the `SessionInitialized` event that was
|
||||
/// submitted to start the session.
|
||||
pub async fn spawn(config: Config, ctrl_c: Arc<Notify>) -> CodexResult<(Codex, String)> {
|
||||
pub async fn spawn(config: Config, ctrl_c: Arc<Notify>) -> CodexResult<(Codex, String, Uuid)> {
|
||||
// experimental resume path (undocumented)
|
||||
let resume_path = config.experimental_resume.clone();
|
||||
info!("resume_path: {resume_path:?}");
|
||||
let (tx_sub, rx_sub) = async_channel::bounded(64);
|
||||
let (tx_event, rx_event) = async_channel::bounded(64);
|
||||
let (tx_event, rx_event) = async_channel::bounded(1600);
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let instructions = get_user_instructions(&config).await;
|
||||
let configure_session = Op::ConfigureSession {
|
||||
provider: config.model_provider.clone(),
|
||||
model: config.model.clone(),
|
||||
model_reasoning_effort: config.model_reasoning_effort,
|
||||
model_reasoning_summary: config.model_reasoning_summary,
|
||||
instructions,
|
||||
user_instructions,
|
||||
base_instructions: config.base_instructions.clone(),
|
||||
approval_policy: config.approval_policy,
|
||||
sandbox_policy: config.sandbox_policy.clone(),
|
||||
disable_response_storage: config.disable_response_storage,
|
||||
notify: config.notify.clone(),
|
||||
cwd: config.cwd.clone(),
|
||||
resume_path: resume_path.clone(),
|
||||
};
|
||||
|
||||
let config = Arc::new(config);
|
||||
tokio::spawn(submission_loop(config, rx_sub, tx_event, ctrl_c));
|
||||
|
||||
// Generate a unique ID for the lifetime of this Codex session.
|
||||
let session_id = Uuid::new_v4();
|
||||
tokio::spawn(submission_loop(
|
||||
session_id, config, rx_sub, tx_event, ctrl_c,
|
||||
));
|
||||
let codex = Codex {
|
||||
next_id: AtomicU64::new(0),
|
||||
tx_sub,
|
||||
@@ -127,7 +138,7 @@ impl Codex {
|
||||
};
|
||||
let init_id = codex.submit(configure_session).await?;
|
||||
|
||||
Ok((codex, init_id))
|
||||
Ok((codex, init_id, session_id))
|
||||
}
|
||||
|
||||
/// Submit the `op` wrapped in a `Submission` with a unique ID.
|
||||
@@ -173,11 +184,13 @@ pub(crate) struct Session {
|
||||
/// the model as well as sandbox policies are resolved against this path
|
||||
/// instead of `std::env::current_dir()`.
|
||||
cwd: PathBuf,
|
||||
instructions: Option<String>,
|
||||
base_instructions: Option<String>,
|
||||
user_instructions: Option<String>,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
shell_environment_policy: ShellEnvironmentPolicy,
|
||||
writable_roots: Mutex<Vec<PathBuf>>,
|
||||
disable_response_storage: bool,
|
||||
|
||||
/// Manager for external MCP servers/tools.
|
||||
mcp_connection_manager: McpConnectionManager,
|
||||
@@ -188,7 +201,7 @@ pub(crate) struct Session {
|
||||
|
||||
/// Optional rollout recorder for persisting the conversation transcript so
|
||||
/// sessions can be replayed or inspected later.
|
||||
rollout: Mutex<Option<crate::rollout::RolloutRecorder>>,
|
||||
rollout: Mutex<Option<RolloutRecorder>>,
|
||||
state: Mutex<State>,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
@@ -206,10 +219,9 @@ impl Session {
|
||||
struct State {
|
||||
approved_commands: HashSet<Vec<String>>,
|
||||
current_task: Option<AgentTask>,
|
||||
previous_response_id: Option<String>,
|
||||
pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
|
||||
pending_input: Vec<ResponseInputItem>,
|
||||
zdr_transcript: Option<ConversationHistory>,
|
||||
history: ConversationHistory,
|
||||
}
|
||||
|
||||
impl Session {
|
||||
@@ -241,6 +253,7 @@ impl Session {
|
||||
pub async fn request_command_approval(
|
||||
&self,
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
reason: Option<String>,
|
||||
@@ -249,6 +262,7 @@ impl Session {
|
||||
let event = Event {
|
||||
id: sub_id.clone(),
|
||||
msg: EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
@@ -265,6 +279,7 @@ impl Session {
|
||||
pub async fn request_patch_approval(
|
||||
&self,
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
action: &ApplyPatchAction,
|
||||
reason: Option<String>,
|
||||
grant_root: Option<PathBuf>,
|
||||
@@ -273,6 +288,7 @@ impl Session {
|
||||
let event = Event {
|
||||
id: sub_id.clone(),
|
||||
msg: EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
||||
call_id,
|
||||
changes: convert_apply_patch_to_protocol(action),
|
||||
reason,
|
||||
grant_root,
|
||||
@@ -302,24 +318,23 @@ impl Session {
|
||||
/// transcript, if enabled.
|
||||
async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
debug!("Recording items for conversation: {items:?}");
|
||||
self.record_rollout_items(items).await;
|
||||
self.record_state_snapshot(items).await;
|
||||
|
||||
if let Some(transcript) = self.state.lock().unwrap().zdr_transcript.as_mut() {
|
||||
transcript.record_items(items);
|
||||
}
|
||||
self.state.lock().unwrap().history.record_items(items);
|
||||
}
|
||||
|
||||
/// Append the given items to the session's rollout transcript (if enabled)
|
||||
/// and persist them to disk.
|
||||
async fn record_rollout_items(&self, items: &[ResponseItem]) {
|
||||
// Clone the recorder outside of the mutex so we don’t hold the lock
|
||||
// across an await point (MutexGuard is not Send).
|
||||
async fn record_state_snapshot(&self, items: &[ResponseItem]) {
|
||||
let snapshot = { crate::rollout::SessionStateSnapshot {} };
|
||||
|
||||
let recorder = {
|
||||
let guard = self.rollout.lock().unwrap();
|
||||
guard.as_ref().cloned()
|
||||
};
|
||||
|
||||
if let Some(rec) = recorder {
|
||||
if let Err(e) = rec.record_state(snapshot).await {
|
||||
error!("failed to record rollout state: {e:#}");
|
||||
}
|
||||
if let Err(e) = rec.record_items(items).await {
|
||||
error!("failed to record rollout items: {e:#}");
|
||||
}
|
||||
@@ -431,7 +446,7 @@ impl Session {
|
||||
}
|
||||
|
||||
let Ok(json) = serde_json::to_string(¬ification) else {
|
||||
tracing::error!("failed to serialise notification payload");
|
||||
error!("failed to serialise notification payload");
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -443,7 +458,7 @@ impl Session {
|
||||
|
||||
// Fire-and-forget – we do not wait for completion.
|
||||
if let Err(e) = command.spawn() {
|
||||
tracing::warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
||||
warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -455,15 +470,10 @@ impl Drop for Session {
|
||||
}
|
||||
|
||||
impl State {
|
||||
pub fn partial_clone(&self, retain_zdr_transcript: bool) -> Self {
|
||||
pub fn partial_clone(&self) -> Self {
|
||||
Self {
|
||||
approved_commands: self.approved_commands.clone(),
|
||||
previous_response_id: self.previous_response_id.clone(),
|
||||
zdr_transcript: if retain_zdr_transcript {
|
||||
self.zdr_transcript.clone()
|
||||
} else {
|
||||
None
|
||||
},
|
||||
history: self.history.clone(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -505,14 +515,12 @@ impl AgentTask {
|
||||
}
|
||||
|
||||
async fn submission_loop(
|
||||
mut session_id: Uuid,
|
||||
config: Arc<Config>,
|
||||
rx_sub: Receiver<Submission>,
|
||||
tx_event: Sender<Event>,
|
||||
ctrl_c: Arc<Notify>,
|
||||
) {
|
||||
// Generate a unique ID for the lifetime of this Codex session.
|
||||
let session_id = Uuid::new_v4();
|
||||
|
||||
let mut sess: Option<Arc<Session>> = None;
|
||||
// shorthand - send an event when there is no active session
|
||||
let send_no_session_event = |sub_id: String| async {
|
||||
@@ -558,14 +566,18 @@ async fn submission_loop(
|
||||
model,
|
||||
model_reasoning_effort,
|
||||
model_reasoning_summary,
|
||||
instructions,
|
||||
user_instructions,
|
||||
base_instructions,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
disable_response_storage,
|
||||
notify,
|
||||
cwd,
|
||||
resume_path,
|
||||
} => {
|
||||
info!("Configuring session: model={model}; provider={provider:?}");
|
||||
info!(
|
||||
"Configuring session: model={model}; provider={provider:?}; resume={resume_path:?}"
|
||||
);
|
||||
if !cwd.is_absolute() {
|
||||
let message = format!("cwd is not absolute: {cwd:?}");
|
||||
error!(message);
|
||||
@@ -578,31 +590,58 @@ async fn submission_loop(
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Optionally resume an existing rollout.
|
||||
let mut restored_items: Option<Vec<ResponseItem>> = None;
|
||||
let rollout_recorder: Option<RolloutRecorder> =
|
||||
if let Some(path) = resume_path.as_ref() {
|
||||
match RolloutRecorder::resume(path).await {
|
||||
Ok((rec, saved)) => {
|
||||
session_id = saved.session_id;
|
||||
if !saved.items.is_empty() {
|
||||
restored_items = Some(saved.items);
|
||||
}
|
||||
Some(rec)
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to resume rollout from {path:?}: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let rollout_recorder = match rollout_recorder {
|
||||
Some(rec) => Some(rec),
|
||||
None => {
|
||||
match RolloutRecorder::new(&config, session_id, user_instructions.clone())
|
||||
.await
|
||||
{
|
||||
Ok(r) => Some(r),
|
||||
Err(e) => {
|
||||
warn!("failed to initialise rollout recorder: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let client = ModelClient::new(
|
||||
model.clone(),
|
||||
config.clone(),
|
||||
provider.clone(),
|
||||
model_reasoning_effort,
|
||||
model_reasoning_summary,
|
||||
session_id,
|
||||
);
|
||||
|
||||
// abort any current running session and clone its state
|
||||
let retain_zdr_transcript =
|
||||
record_conversation_history(disable_response_storage, provider.wire_api);
|
||||
let state = match sess.take() {
|
||||
Some(sess) => {
|
||||
sess.abort();
|
||||
sess.state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.partial_clone(retain_zdr_transcript)
|
||||
sess.state.lock().unwrap().partial_clone()
|
||||
}
|
||||
None => State {
|
||||
zdr_transcript: if retain_zdr_transcript {
|
||||
Some(ConversationHistory::new())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
history: ConversationHistory::new(),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
@@ -637,26 +676,12 @@ async fn submission_loop(
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to create a RolloutRecorder *before* moving the
|
||||
// `instructions` value into the Session struct.
|
||||
// TODO: if ConfigureSession is sent twice, we will create an
|
||||
// overlapping rollout file. Consider passing RolloutRecorder
|
||||
// from above.
|
||||
let rollout_recorder =
|
||||
match RolloutRecorder::new(&config, session_id, instructions.clone()).await {
|
||||
Ok(r) => Some(r),
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to initialise rollout recorder: {e}");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
sess = Some(Arc::new(Session {
|
||||
client,
|
||||
tx_event: tx_event.clone(),
|
||||
ctrl_c: Arc::clone(&ctrl_c),
|
||||
instructions,
|
||||
user_instructions,
|
||||
base_instructions,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
shell_environment_policy: config.shell_environment_policy.clone(),
|
||||
@@ -667,8 +692,17 @@ async fn submission_loop(
|
||||
state: Mutex::new(state),
|
||||
rollout: Mutex::new(rollout_recorder),
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
disable_response_storage,
|
||||
}));
|
||||
|
||||
// Patch restored state into the newly created session.
|
||||
if let Some(sess_arc) = &sess {
|
||||
if restored_items.is_some() {
|
||||
let mut st = sess_arc.state.lock().unwrap();
|
||||
st.history.record_items(restored_items.unwrap().iter());
|
||||
}
|
||||
}
|
||||
|
||||
// Gather history metadata for SessionConfiguredEvent.
|
||||
let (history_log_id, history_entry_count) =
|
||||
crate::message_history::history_metadata(&config).await;
|
||||
@@ -737,12 +771,14 @@ async fn submission_loop(
|
||||
}
|
||||
}
|
||||
Op::AddToHistory { text } => {
|
||||
// TODO: What should we do if we got AddToHistory before ConfigureSession?
|
||||
// currently, if ConfigureSession has resume path, this history will be ignored
|
||||
let id = session_id;
|
||||
let config = config.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await
|
||||
{
|
||||
tracing::warn!("failed to append to message history: {e}");
|
||||
warn!("failed to append to message history: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -772,10 +808,41 @@ async fn submission_loop(
|
||||
};
|
||||
|
||||
if let Err(e) = tx_event.send(event).await {
|
||||
tracing::warn!("failed to send GetHistoryEntryResponse event: {e}");
|
||||
warn!("failed to send GetHistoryEntryResponse event: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
Op::Shutdown => {
|
||||
info!("Shutting down Codex instance");
|
||||
|
||||
// Gracefully flush and shutdown rollout recorder on session end so tests
|
||||
// that inspect the rollout file do not race with the background writer.
|
||||
if let Some(sess_arc) = sess {
|
||||
let recorder_opt = sess_arc.rollout.lock().unwrap().take();
|
||||
if let Some(rec) = recorder_opt {
|
||||
if let Err(e) = rec.shutdown().await {
|
||||
warn!("failed to shutdown rollout recorder: {e}");
|
||||
let event = Event {
|
||||
id: sub.id.clone(),
|
||||
msg: EventMsg::Error(ErrorEvent {
|
||||
message: "Failed to shutdown rollout recorder".to_string(),
|
||||
}),
|
||||
};
|
||||
if let Err(e) = tx_event.send(event).await {
|
||||
warn!("failed to send error message: {e:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let event = Event {
|
||||
id: sub.id.clone(),
|
||||
msg: EventMsg::ShutdownComplete,
|
||||
};
|
||||
if let Err(e) = tx_event.send(event).await {
|
||||
warn!("failed to send Shutdown event: {e}");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("Agent loop exited");
|
||||
@@ -810,14 +877,8 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
|
||||
.await;
|
||||
|
||||
let mut input_for_next_turn: Vec<ResponseInputItem> = vec![initial_input_for_turn];
|
||||
let last_agent_message: Option<String>;
|
||||
loop {
|
||||
let mut net_new_turn_input = input_for_next_turn
|
||||
.drain(..)
|
||||
.map(ResponseItem::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Note that pending_input would be something like a message the user
|
||||
// submitted through the UI while the model was running. Though the UI
|
||||
// may support this, the model might not.
|
||||
@@ -834,29 +895,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
// only record the new items that originated in this turn so that it
|
||||
// represents an append-only log without duplicates.
|
||||
let turn_input: Vec<ResponseItem> =
|
||||
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
|
||||
// If we are using Chat/ZDR, we need to send the transcript with
|
||||
// every turn. By induction, `transcript` already contains:
|
||||
// - The `input` that kicked off this task.
|
||||
// - Each `ResponseItem` that was recorded in the previous turn.
|
||||
// - Each response to a `ResponseItem` (in practice, the only
|
||||
// response type we seem to have is `FunctionCallOutput`).
|
||||
//
|
||||
// The only thing the `transcript` does not contain is the
|
||||
// `pending_input` that was injected while the model was
|
||||
// running. We need to add that to the conversation history
|
||||
// so that the model can see it in the next turn.
|
||||
[transcript.contents(), pending_input].concat()
|
||||
} else {
|
||||
// In practice, net_new_turn_input should contain only:
|
||||
// - User messages
|
||||
// - Outputs for function calls requested by the model
|
||||
net_new_turn_input.extend(pending_input);
|
||||
|
||||
// Responses API path – we can just send the new items and
|
||||
// record the same.
|
||||
net_new_turn_input
|
||||
};
|
||||
[sess.state.lock().unwrap().history.contents(), pending_input].concat();
|
||||
|
||||
let turn_input_messages: Vec<String> = turn_input
|
||||
.iter()
|
||||
@@ -912,15 +951,17 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let (content, success): (String, Option<bool>) = match result {
|
||||
Ok(CallToolResult { content, is_error }) => {
|
||||
match serde_json::to_string(content) {
|
||||
Ok(content) => (content, *is_error),
|
||||
Err(e) => {
|
||||
warn!("Failed to serialize MCP tool call output: {e}");
|
||||
(e.to_string(), Some(true))
|
||||
}
|
||||
Ok(CallToolResult {
|
||||
content,
|
||||
is_error,
|
||||
structured_content: _,
|
||||
}) => match serde_json::to_string(content) {
|
||||
Ok(content) => (content, *is_error),
|
||||
Err(e) => {
|
||||
warn!("Failed to serialize MCP tool call output: {e}");
|
||||
(e.to_string(), Some(true))
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(e) => (e.clone(), Some(true)),
|
||||
};
|
||||
items_to_record_in_conversation_history.push(
|
||||
@@ -930,8 +971,19 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
},
|
||||
);
|
||||
}
|
||||
(ResponseItem::Reasoning { .. }, None) => {
|
||||
// Omit from conversation history.
|
||||
(
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
encrypted_content,
|
||||
},
|
||||
None,
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary.clone(),
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
@@ -960,8 +1012,6 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
input_for_next_turn = responses;
|
||||
}
|
||||
Err(e) => {
|
||||
info!("Turn error: {e:#}");
|
||||
@@ -989,27 +1039,13 @@ async fn run_turn(
|
||||
sub_id: String,
|
||||
input: Vec<ResponseItem>,
|
||||
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
||||
// Decide whether to use server-side storage (previous_response_id) or disable it
|
||||
let (prev_id, store) = {
|
||||
let state = sess.state.lock().unwrap();
|
||||
let store = state.zdr_transcript.is_none();
|
||||
let prev_id = if store {
|
||||
state.previous_response_id.clone()
|
||||
} else {
|
||||
// When using ZDR, the Responses API may send previous_response_id
|
||||
// back, but trying to use it results in a 400.
|
||||
None
|
||||
};
|
||||
(prev_id, store)
|
||||
};
|
||||
|
||||
let extra_tools = sess.mcp_connection_manager.list_all_tools();
|
||||
let prompt = Prompt {
|
||||
input,
|
||||
prev_id,
|
||||
user_instructions: sess.instructions.clone(),
|
||||
store,
|
||||
user_instructions: sess.user_instructions.clone(),
|
||||
store: !sess.disable_response_storage,
|
||||
extra_tools,
|
||||
base_instructions_override: sess.base_instructions.clone(),
|
||||
};
|
||||
|
||||
let mut retries = 0;
|
||||
@@ -1019,12 +1055,13 @@ async fn run_turn(
|
||||
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
||||
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
||||
Err(e) => {
|
||||
if retries < *OPENAI_STREAM_MAX_RETRIES {
|
||||
// Use the configured provider-specific stream retry budget.
|
||||
let max_retries = sess.client.get_provider().stream_max_retries();
|
||||
if retries < max_retries {
|
||||
retries += 1;
|
||||
let delay = backoff(retries);
|
||||
warn!(
|
||||
"stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
|
||||
*OPENAI_STREAM_MAX_RETRIES
|
||||
"stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
|
||||
);
|
||||
|
||||
// Surface retry information to any UI/front‑end so the
|
||||
@@ -1033,8 +1070,7 @@ async fn run_turn(
|
||||
sess.notify_background_event(
|
||||
&sub_id,
|
||||
format!(
|
||||
"stream error: {e}; retrying {retries}/{} in {:?}…",
|
||||
*OPENAI_STREAM_MAX_RETRIES, delay
|
||||
"stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
|
||||
),
|
||||
)
|
||||
.await;
|
||||
@@ -1052,6 +1088,7 @@ async fn run_turn(
|
||||
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
|
||||
/// "handled" such that it produces a `ResponseInputItem` that needs to be
|
||||
/// sent back to the model on the next turn.
|
||||
#[derive(Debug)]
|
||||
struct ProcessedResponseItem {
|
||||
item: ResponseItem,
|
||||
response: Option<ResponseInputItem>,
|
||||
@@ -1062,30 +1099,126 @@ async fn try_run_turn(
|
||||
sub_id: &str,
|
||||
prompt: &Prompt,
|
||||
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
||||
let mut stream = sess.client.clone().stream(prompt).await?;
|
||||
// call_ids that are part of this response.
|
||||
let completed_call_ids = prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Buffer all the incoming messages from the stream first, then execute them.
|
||||
// If we execute a function call in the middle of handling the stream, it can time out.
|
||||
let mut input = Vec::new();
|
||||
while let Some(event) = stream.next().await {
|
||||
input.push(event?);
|
||||
}
|
||||
// call_ids that were pending but are not part of this response.
|
||||
// This usually happens because the user interrupted the model before we responded to one of its tool calls
|
||||
// and then the user sent a follow-up message.
|
||||
let missing_calls = {
|
||||
prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.filter_map(|call_id| {
|
||||
if completed_call_ids.contains(&call_id) {
|
||||
None
|
||||
} else {
|
||||
Some(call_id.clone())
|
||||
}
|
||||
})
|
||||
.map(|call_id| ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: Some(false),
|
||||
},
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let prompt: Cow<Prompt> = if missing_calls.is_empty() {
|
||||
Cow::Borrowed(prompt)
|
||||
} else {
|
||||
// Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
|
||||
let input = [missing_calls, prompt.input.clone()].concat();
|
||||
Cow::Owned(Prompt {
|
||||
input,
|
||||
..prompt.clone()
|
||||
})
|
||||
};
|
||||
|
||||
let mut stream = sess.client.clone().stream(&prompt).await?;
|
||||
|
||||
let mut output = Vec::new();
|
||||
for event in input {
|
||||
loop {
|
||||
// Poll the next item from the model stream. We must inspect *both* Ok and Err
|
||||
// cases so that transient stream failures (e.g., dropped SSE connection before
|
||||
// `response.completed`) bubble up and trigger the caller's retry logic.
|
||||
let event = stream.next().await;
|
||||
let Some(event) = event else {
|
||||
// Channel closed without yielding a final Completed event or explicit error.
|
||||
// Treat as a disconnected stream so the caller can retry.
|
||||
return Err(CodexErr::Stream(
|
||||
"stream closed before response.completed".into(),
|
||||
));
|
||||
};
|
||||
|
||||
let event = match event {
|
||||
Ok(ev) => ev,
|
||||
Err(e) => {
|
||||
// Propagate the underlying stream error to the caller (run_turn), which
|
||||
// will apply the configured `stream_max_retries` policy.
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
match event {
|
||||
ResponseEvent::Created => {}
|
||||
ResponseEvent::OutputItemDone(item) => {
|
||||
let response = handle_response_item(sess, sub_id, item.clone()).await?;
|
||||
|
||||
output.push(ProcessedResponseItem { item, response });
|
||||
}
|
||||
ResponseEvent::Completed { response_id } => {
|
||||
let mut state = sess.state.lock().unwrap();
|
||||
state.previous_response_id = Some(response_id);
|
||||
break;
|
||||
ResponseEvent::Completed {
|
||||
response_id: _,
|
||||
token_usage,
|
||||
} => {
|
||||
if let Some(token_usage) = token_usage {
|
||||
sess.tx_event
|
||||
.send(Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::TokenCount(token_usage),
|
||||
})
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
return Ok(output);
|
||||
}
|
||||
ResponseEvent::OutputTextDelta(delta) => {
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }),
|
||||
};
|
||||
sess.tx_event.send(event).await.ok();
|
||||
}
|
||||
ResponseEvent::ReasoningSummaryDelta(delta) => {
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }),
|
||||
};
|
||||
sess.tx_event.send(event).await.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
async fn handle_response_item(
|
||||
@@ -1107,7 +1240,7 @@ async fn handle_response_item(
|
||||
}
|
||||
None
|
||||
}
|
||||
ResponseItem::Reasoning { id: _, summary } => {
|
||||
ResponseItem::Reasoning { summary, .. } => {
|
||||
for item in summary {
|
||||
let text = match item {
|
||||
ReasoningItemReasoningSummary::SummaryText { text } => text,
|
||||
@@ -1124,8 +1257,9 @@ async fn handle_response_item(
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
tracing::info!("FunctionCall: {arguments}");
|
||||
info!("FunctionCall: {arguments}");
|
||||
Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await)
|
||||
}
|
||||
ResponseItem::LocalShellCall {
|
||||
@@ -1188,13 +1322,13 @@ async fn handle_function_call(
|
||||
let params = match parse_container_exec_arguments(arguments, sess, &call_id) {
|
||||
Ok(params) => params,
|
||||
Err(output) => {
|
||||
return output;
|
||||
return *output;
|
||||
}
|
||||
};
|
||||
handle_container_exec_with_params(params, sess, sub_id, call_id).await
|
||||
}
|
||||
_ => {
|
||||
match try_parse_fully_qualified_tool_name(&name) {
|
||||
match sess.mcp_connection_manager.parse_tool_name(&name) {
|
||||
Some((server, tool_name)) => {
|
||||
// TODO(mbolin): Determine appropriate timeout for tool call.
|
||||
let timeout = None;
|
||||
@@ -1207,8 +1341,8 @@ async fn handle_function_call(
|
||||
// Unknown function: reply with structured failure so the model can adapt.
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
content: format!("unsupported call: {}", name),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("unsupported call: {name}"),
|
||||
success: None,
|
||||
},
|
||||
}
|
||||
@@ -1231,7 +1365,7 @@ fn parse_container_exec_arguments(
|
||||
arguments: String,
|
||||
sess: &Session,
|
||||
call_id: &str,
|
||||
) -> Result<ExecParams, ResponseInputItem> {
|
||||
) -> Result<ExecParams, Box<ResponseInputItem>> {
|
||||
// parse command
|
||||
match serde_json::from_str::<ShellToolCallParams>(&arguments) {
|
||||
Ok(shell_tool_call_params) => Ok(to_exec_params(shell_tool_call_params, sess)),
|
||||
@@ -1239,12 +1373,12 @@ fn parse_container_exec_arguments(
|
||||
// allow model to re-sample
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("failed to parse function arguments: {e}"),
|
||||
success: None,
|
||||
},
|
||||
};
|
||||
Err(output)
|
||||
Err(Box::new(output))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1294,6 +1428,7 @@ async fn handle_container_exec_with_params(
|
||||
let rx_approve = sess
|
||||
.request_command_approval(
|
||||
sub_id.clone(),
|
||||
call_id.clone(),
|
||||
params.command.clone(),
|
||||
params.cwd.clone(),
|
||||
None,
|
||||
@@ -1307,7 +1442,7 @@ async fn handle_container_exec_with_params(
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "exec command rejected by user".to_string(),
|
||||
success: None,
|
||||
},
|
||||
@@ -1323,7 +1458,7 @@ async fn handle_container_exec_with_params(
|
||||
SafetyCheck::Reject { reason } => {
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("exec command rejected: {reason}"),
|
||||
success: None,
|
||||
},
|
||||
@@ -1371,7 +1506,7 @@ async fn handle_container_exec_with_params(
|
||||
}
|
||||
}
|
||||
Err(CodexErr::Sandbox(error)) => {
|
||||
handle_sanbox_error(error, sandbox_type, params, sess, sub_id, call_id).await
|
||||
handle_sandbox_error(error, sandbox_type, params, sess, sub_id, call_id).await
|
||||
}
|
||||
Err(e) => {
|
||||
// Handle non-sandbox errors
|
||||
@@ -1386,7 +1521,7 @@ async fn handle_container_exec_with_params(
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_sanbox_error(
|
||||
async fn handle_sandbox_error(
|
||||
error: SandboxErr,
|
||||
sandbox_type: SandboxType,
|
||||
params: ExecParams,
|
||||
@@ -1400,21 +1535,28 @@ async fn handle_sanbox_error(
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!(
|
||||
"failed in sandbox {:?} with execution error: {error}",
|
||||
sandbox_type
|
||||
"failed in sandbox {sandbox_type:?} with execution error: {error}"
|
||||
),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Ask the user to retry without sandbox
|
||||
// Note that when `error` is `SandboxErr::Denied`, it could be a false
|
||||
// positive. That is, it may have exited with a non-zero exit code, not
|
||||
// because the sandbox denied it, but because that is its expected behavior,
|
||||
// i.e., a grep command that did not match anything. Ideally we would
|
||||
// include additional metadata on the command to indicate whether non-zero
|
||||
// exit codes merit a retry.
|
||||
|
||||
// For now, we categorically ask the user to retry without sandbox.
|
||||
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
|
||||
.await;
|
||||
|
||||
let rx_approve = sess
|
||||
.request_command_approval(
|
||||
sub_id.clone(),
|
||||
call_id.clone(),
|
||||
params.command.clone(),
|
||||
params.cwd.clone(),
|
||||
Some("command failed; retry without sandbox?".to_string()),
|
||||
@@ -1432,9 +1574,7 @@ async fn handle_sanbox_error(
|
||||
sess.notify_background_event(&sub_id, "retrying command without sandbox")
|
||||
.await;
|
||||
|
||||
// Emit a fresh Begin event so progress bars reset.
|
||||
let retry_call_id = format!("{call_id}-retry");
|
||||
sess.notify_exec_command_begin(&sub_id, &retry_call_id, ¶ms)
|
||||
sess.notify_exec_command_begin(&sub_id, &call_id, ¶ms)
|
||||
.await;
|
||||
|
||||
// This is an escalated retry; the policy will not be
|
||||
@@ -1457,14 +1597,8 @@ async fn handle_sanbox_error(
|
||||
duration,
|
||||
} = retry_output;
|
||||
|
||||
sess.notify_exec_command_end(
|
||||
&sub_id,
|
||||
&retry_call_id,
|
||||
&stdout,
|
||||
&stderr,
|
||||
exit_code,
|
||||
)
|
||||
.await;
|
||||
sess.notify_exec_command_end(&sub_id, &call_id, &stdout, &stderr, exit_code)
|
||||
.await;
|
||||
|
||||
let is_success = exit_code == 0;
|
||||
let content = format_exec_output(
|
||||
@@ -1528,7 +1662,7 @@ async fn apply_patch(
|
||||
// Compute a readable summary of path changes to include in the
|
||||
// approval request so the user can make an informed decision.
|
||||
let rx_approve = sess
|
||||
.request_patch_approval(sub_id.clone(), &action, None, None)
|
||||
.request_patch_approval(sub_id.clone(), call_id.clone(), &action, None, None)
|
||||
.await;
|
||||
match rx_approve.await.unwrap_or_default() {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => false,
|
||||
@@ -1566,7 +1700,13 @@ async fn apply_patch(
|
||||
));
|
||||
|
||||
let rx = sess
|
||||
.request_patch_approval(sub_id.clone(), &action, reason.clone(), Some(root.clone()))
|
||||
.request_patch_approval(
|
||||
sub_id.clone(),
|
||||
call_id.clone(),
|
||||
&action,
|
||||
reason.clone(),
|
||||
Some(root.clone()),
|
||||
)
|
||||
.await;
|
||||
|
||||
if !matches!(
|
||||
@@ -1650,6 +1790,7 @@ async fn apply_patch(
|
||||
let rx = sess
|
||||
.request_patch_approval(
|
||||
sub_id.clone(),
|
||||
call_id.clone(),
|
||||
&action,
|
||||
reason.clone(),
|
||||
Some(root.clone()),
|
||||
@@ -1850,7 +1991,7 @@ fn apply_changes_from_apply_patch(action: &ApplyPatchAction) -> anyhow::Result<A
|
||||
})
|
||||
}
|
||||
|
||||
fn get_writable_roots(cwd: &Path) -> Vec<std::path::PathBuf> {
|
||||
fn get_writable_roots(cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::new();
|
||||
if cfg!(target_os = "macos") {
|
||||
// On macOS, $TMPDIR is private to the user.
|
||||
@@ -1878,7 +2019,7 @@ fn get_writable_roots(cwd: &Path) -> Vec<std::path::PathBuf> {
|
||||
}
|
||||
|
||||
/// Exec output is a pre-serialized JSON payload
|
||||
fn format_exec_output(output: &str, exit_code: i32, duration: std::time::Duration) -> String {
|
||||
fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> String {
|
||||
#[derive(Serialize)]
|
||||
struct ExecMetadata {
|
||||
exit_code: i32,
|
||||
@@ -1908,7 +2049,7 @@ fn format_exec_output(output: &str, exit_code: i32, duration: std::time::Duratio
|
||||
|
||||
fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
|
||||
responses.iter().rev().find_map(|item| {
|
||||
if let ResponseItem::Message { role, content } = item {
|
||||
if let ResponseItem::Message { role, content, .. } = item {
|
||||
if role == "assistant" {
|
||||
content.iter().rev().find_map(|ci| {
|
||||
if let ContentItem::OutputText { text } = ci {
|
||||
@@ -1925,15 +2066,3 @@ fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<St
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// See [`ConversationHistory`] for details.
|
||||
fn record_conversation_history(disable_response_storage: bool, wire_api: WireApi) -> bool {
|
||||
if disable_response_storage {
|
||||
return true;
|
||||
}
|
||||
|
||||
match wire_api {
|
||||
WireApi::Responses => false,
|
||||
WireApi::Chat => true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,15 +6,16 @@ use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::util::notify_on_sigint;
|
||||
use tokio::sync::Notify;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Spawn a new [`Codex`] and initialize the session.
|
||||
///
|
||||
/// Returns the wrapped [`Codex`] **and** the `SessionInitialized` event that
|
||||
/// is received as a response to the initial `ConfigureSession` submission so
|
||||
/// that callers can surface the information to the UI.
|
||||
pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Notify>)> {
|
||||
pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Notify>, Uuid)> {
|
||||
let ctrl_c = notify_on_sigint();
|
||||
let (codex, init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let (codex, init_id, session_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
|
||||
// The first event must be `SessionInitialized`. Validate and forward it to
|
||||
// the caller so that they can display it in the conversation history.
|
||||
@@ -33,5 +34,5 @@ pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Not
|
||||
));
|
||||
}
|
||||
|
||||
Ok((codex, event, ctrl_c))
|
||||
Ok((codex, event, ctrl_c, session_id))
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ use crate::config_types::History;
|
||||
use crate::config_types::McpServerConfig;
|
||||
use crate::config_types::ReasoningEffort;
|
||||
use crate::config_types::ReasoningSummary;
|
||||
use crate::config_types::SandboxMode;
|
||||
use crate::config_types::SandboxWorkplaceWrite;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::config_types::ShellEnvironmentPolicyToml;
|
||||
use crate::config_types::Tui;
|
||||
@@ -10,8 +12,8 @@ use crate::config_types::UriBasedFileOpener;
|
||||
use crate::flags::OPENAI_DEFAULT_MODEL;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPermission;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use dirs::home_dir;
|
||||
use serde::Deserialize;
|
||||
@@ -31,6 +33,12 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Key into the model_providers map that specifies which provider to use.
|
||||
pub model_provider_id: String,
|
||||
|
||||
@@ -55,7 +63,10 @@ pub struct Config {
|
||||
pub disable_response_storage: bool,
|
||||
|
||||
/// User-provided instructions from instructions.md.
|
||||
pub instructions: Option<String>,
|
||||
pub user_instructions: Option<String>,
|
||||
|
||||
/// Base instructions override.
|
||||
pub base_instructions: Option<String>,
|
||||
|
||||
/// Optional external notifier command. When set, Codex will spawn this
|
||||
/// program after each completed *turn* (i.e. when the agent finishes
|
||||
@@ -122,6 +133,16 @@ pub struct Config {
|
||||
/// If not "none", the value to use for `reasoning.summary` when making a
|
||||
/// request using the Responses API.
|
||||
pub model_reasoning_summary: ReasoningSummary,
|
||||
|
||||
/// When set to `true`, overrides the default heuristic and forces
|
||||
/// `model_supports_reasoning_summaries()` to return `true`.
|
||||
pub model_supports_reasoning_summaries: bool,
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: String,
|
||||
|
||||
/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
|
||||
pub experimental_resume: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -235,17 +256,23 @@ pub struct ConfigToml {
|
||||
/// Provider to use from the model_providers map.
|
||||
pub model_provider: Option<String>,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Default approval policy for executing commands.
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
|
||||
#[serde(default)]
|
||||
pub shell_environment_policy: ShellEnvironmentPolicyToml,
|
||||
|
||||
// The `default` attribute ensures that the field is treated as `None` when
|
||||
// the key is omitted from the TOML. Without it, Serde treats the field as
|
||||
// required because we supply a custom deserializer.
|
||||
#[serde(default, deserialize_with = "deserialize_sandbox_permissions")]
|
||||
pub sandbox_permissions: Option<Vec<SandboxPermission>>,
|
||||
/// Sandbox mode to use.
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
|
||||
/// Sandbox configuration to apply if `sandbox` is `WorkspaceWrite`.
|
||||
pub sandbox_workspace_write: Option<SandboxWorkplaceWrite>,
|
||||
|
||||
/// Disable server-side response storage (sends the full conversation
|
||||
/// context with every request). Currently necessary for OpenAI customers
|
||||
@@ -294,31 +321,37 @@ pub struct ConfigToml {
|
||||
|
||||
pub model_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub model_reasoning_summary: Option<ReasoningSummary>,
|
||||
|
||||
/// Override to force-enable reasoning summaries for the configured model.
|
||||
pub model_supports_reasoning_summaries: Option<bool>,
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
|
||||
/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
|
||||
pub experimental_resume: Option<PathBuf>,
|
||||
|
||||
/// Experimental path to a file whose contents replace the built-in BASE_INSTRUCTIONS.
|
||||
pub experimental_instructions_file: Option<PathBuf>,
|
||||
}
|
||||
|
||||
fn deserialize_sandbox_permissions<'de, D>(
|
||||
deserializer: D,
|
||||
) -> Result<Option<Vec<SandboxPermission>>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let permissions: Option<Vec<String>> = Option::deserialize(deserializer)?;
|
||||
|
||||
match permissions {
|
||||
Some(raw_permissions) => {
|
||||
let base_path = find_codex_home().map_err(serde::de::Error::custom)?;
|
||||
|
||||
let converted = raw_permissions
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
parse_sandbox_permission_with_base_path(&raw, base_path.clone())
|
||||
.map_err(serde::de::Error::custom)
|
||||
})
|
||||
.collect::<Result<Vec<_>, D::Error>>()?;
|
||||
|
||||
Ok(Some(converted))
|
||||
impl ConfigToml {
|
||||
/// Derive the effective sandbox policy from the configuration.
|
||||
fn derive_sandbox_policy(&self, sandbox_mode_override: Option<SandboxMode>) -> SandboxPolicy {
|
||||
let resolved_sandbox_mode = sandbox_mode_override
|
||||
.or(self.sandbox_mode)
|
||||
.unwrap_or_default();
|
||||
match resolved_sandbox_mode {
|
||||
SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
|
||||
SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
|
||||
Some(s) => SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: s.writable_roots.clone(),
|
||||
network_access: s.network_access,
|
||||
},
|
||||
None => SandboxPolicy::new_workspace_write_policy(),
|
||||
},
|
||||
SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,10 +361,11 @@ pub struct ConfigOverrides {
|
||||
pub model: Option<String>,
|
||||
pub cwd: Option<PathBuf>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox_policy: Option<SandboxPolicy>,
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
pub model_provider: Option<String>,
|
||||
pub config_profile: Option<String>,
|
||||
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub base_instructions: Option<String>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -342,23 +376,24 @@ impl Config {
|
||||
overrides: ConfigOverrides,
|
||||
codex_home: PathBuf,
|
||||
) -> std::io::Result<Self> {
|
||||
let instructions = Self::load_instructions(Some(&codex_home));
|
||||
let user_instructions = Self::load_instructions(Some(&codex_home));
|
||||
|
||||
// Destructure ConfigOverrides fully to ensure all overrides are applied.
|
||||
let ConfigOverrides {
|
||||
model,
|
||||
cwd,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
sandbox_mode,
|
||||
model_provider,
|
||||
config_profile: config_profile_key,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions,
|
||||
} = overrides;
|
||||
|
||||
let config_profile = match config_profile_key.or(cfg.profile) {
|
||||
let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
|
||||
Some(key) => cfg
|
||||
.profiles
|
||||
.get(&key)
|
||||
.get(key)
|
||||
.ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
@@ -369,20 +404,7 @@ impl Config {
|
||||
None => ConfigProfile::default(),
|
||||
};
|
||||
|
||||
let sandbox_policy = match sandbox_policy {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => {
|
||||
// Derive a SandboxPolicy from the permissions in the config.
|
||||
match cfg.sandbox_permissions {
|
||||
// Note this means the user can explicitly set permissions
|
||||
// to the empty list in the config file, granting it no
|
||||
// permissions whatsoever.
|
||||
Some(permissions) => SandboxPolicy::from(permissions),
|
||||
// Default to read only rather than completely locked down.
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode);
|
||||
|
||||
let mut model_providers = built_in_model_providers();
|
||||
// Merge user-defined providers into the built-in list.
|
||||
@@ -427,11 +449,30 @@ impl Config {
|
||||
|
||||
let history = cfg.history.unwrap_or_default();
|
||||
|
||||
let model = model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model);
|
||||
let openai_model_info = get_model_info(&model);
|
||||
let model_context_window = cfg
|
||||
.model_context_window
|
||||
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
||||
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
.map(|info| info.max_output_tokens)
|
||||
});
|
||||
|
||||
let experimental_resume = cfg.experimental_resume;
|
||||
|
||||
let base_instructions = base_instructions.or(Self::get_base_instructions(
|
||||
cfg.experimental_instructions_file.as_ref(),
|
||||
));
|
||||
|
||||
let config = Self {
|
||||
model: model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model),
|
||||
model,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
model_provider_id,
|
||||
model_provider,
|
||||
cwd: resolved_cwd,
|
||||
@@ -446,7 +487,8 @@ impl Config {
|
||||
.or(cfg.disable_response_storage)
|
||||
.unwrap_or(false),
|
||||
notify: cfg.notify,
|
||||
instructions,
|
||||
user_instructions,
|
||||
base_instructions,
|
||||
mcp_servers: cfg.mcp_servers,
|
||||
model_providers,
|
||||
project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
|
||||
@@ -457,8 +499,25 @@ impl Config {
|
||||
codex_linux_sandbox_exe,
|
||||
|
||||
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
|
||||
model_reasoning_effort: cfg.model_reasoning_effort.unwrap_or_default(),
|
||||
model_reasoning_summary: cfg.model_reasoning_summary.unwrap_or_default(),
|
||||
model_reasoning_effort: config_profile
|
||||
.model_reasoning_effort
|
||||
.or(cfg.model_reasoning_effort)
|
||||
.unwrap_or_default(),
|
||||
model_reasoning_summary: config_profile
|
||||
.model_reasoning_summary
|
||||
.or(cfg.model_reasoning_summary)
|
||||
.unwrap_or_default(),
|
||||
|
||||
model_supports_reasoning_summaries: cfg
|
||||
.model_supports_reasoning_summaries
|
||||
.unwrap_or(false),
|
||||
|
||||
chatgpt_base_url: config_profile
|
||||
.chatgpt_base_url
|
||||
.or(cfg.chatgpt_base_url)
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
|
||||
experimental_resume,
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -479,6 +538,15 @@ impl Config {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn get_base_instructions(path: Option<&PathBuf>) -> Option<String> {
|
||||
let path = path.as_ref()?;
|
||||
|
||||
std::fs::read_to_string(path)
|
||||
.ok()
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
fn default_model() -> String {
|
||||
@@ -493,7 +561,7 @@ fn default_model() -> String {
|
||||
/// function will Err if the path does not exist.
|
||||
/// - If `CODEX_HOME` is not set, this function does not verify that the
|
||||
/// directory exists.
|
||||
fn find_codex_home() -> std::io::Result<PathBuf> {
|
||||
pub fn find_codex_home() -> std::io::Result<PathBuf> {
|
||||
// Honor the `CODEX_HOME` environment variable when it is set to allow users
|
||||
// (and tests) to override the default location.
|
||||
if let Ok(val) = std::env::var("CODEX_HOME") {
|
||||
@@ -520,50 +588,6 @@ pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
pub fn parse_sandbox_permission_with_base_path(
|
||||
raw: &str,
|
||||
base_path: PathBuf,
|
||||
) -> std::io::Result<SandboxPermission> {
|
||||
use SandboxPermission::*;
|
||||
|
||||
if let Some(path) = raw.strip_prefix("disk-write-folder=") {
|
||||
return if path.is_empty() {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"--sandbox-permission disk-write-folder=<PATH> requires a non-empty PATH",
|
||||
))
|
||||
} else {
|
||||
use path_absolutize::*;
|
||||
|
||||
let file = PathBuf::from(path);
|
||||
let absolute_path = if file.is_relative() {
|
||||
file.absolutize_from(base_path)
|
||||
} else {
|
||||
file.absolutize()
|
||||
}
|
||||
.map(|path| path.into_owned())?;
|
||||
Ok(DiskWriteFolder {
|
||||
folder: absolute_path,
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
match raw {
|
||||
"disk-full-read-access" => Ok(DiskFullReadAccess),
|
||||
"disk-write-platform-user-temp-folder" => Ok(DiskWritePlatformUserTempFolder),
|
||||
"disk-write-platform-global-temp-folder" => Ok(DiskWritePlatformGlobalTempFolder),
|
||||
"disk-write-cwd" => Ok(DiskWriteCwd),
|
||||
"disk-full-write-access" => Ok(DiskFullWriteAccess),
|
||||
"network-full-access" => Ok(NetworkFullAccess),
|
||||
_ => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"`{raw}` is not a recognised permission.\nRun with `--help` to see the accepted values."
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
@@ -573,51 +597,14 @@ mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Verify that the `sandbox_permissions` field on `ConfigToml` correctly
|
||||
/// differentiates between a value that is completely absent in the
|
||||
/// provided TOML (i.e. `None`) and one that is explicitly specified as an
|
||||
/// empty array (i.e. `Some(vec![])`). This ensures that downstream logic
|
||||
/// that treats these two cases differently (default read-only policy vs a
|
||||
/// fully locked-down sandbox) continues to function.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_none_vs_empty_vec() {
|
||||
// Case 1: `sandbox_permissions` key is *absent* from the TOML source.
|
||||
let toml_source_without_key = "";
|
||||
let cfg_without_key: ConfigToml = toml::from_str(toml_source_without_key)
|
||||
.expect("TOML deserialization without key should succeed");
|
||||
assert!(cfg_without_key.sandbox_permissions.is_none());
|
||||
|
||||
// Case 2: `sandbox_permissions` is present but set to an *empty array*.
|
||||
let toml_source_with_empty = "sandbox_permissions = []";
|
||||
let cfg_with_empty: ConfigToml = toml::from_str(toml_source_with_empty)
|
||||
.expect("TOML deserialization with empty array should succeed");
|
||||
assert_eq!(Some(vec![]), cfg_with_empty.sandbox_permissions);
|
||||
|
||||
// Case 3: `sandbox_permissions` contains a non-empty list of valid values.
|
||||
let toml_source_with_values = r#"
|
||||
sandbox_permissions = ["disk-full-read-access", "network-full-access"]
|
||||
"#;
|
||||
let cfg_with_values: ConfigToml = toml::from_str(toml_source_with_values)
|
||||
.expect("TOML deserialization with valid permissions should succeed");
|
||||
|
||||
assert_eq!(
|
||||
Some(vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::NetworkFullAccess
|
||||
]),
|
||||
cfg_with_values.sandbox_permissions
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_parsing() {
|
||||
let history_with_persistence = r#"
|
||||
[history]
|
||||
persistence = "save-all"
|
||||
"#;
|
||||
let history_with_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_with_persistence_cfg = toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::SaveAll,
|
||||
@@ -631,9 +618,8 @@ persistence = "save-all"
|
||||
persistence = "none"
|
||||
"#;
|
||||
|
||||
let history_no_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_no_persistence_cfg = toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::None,
|
||||
@@ -643,20 +629,56 @@ persistence = "none"
|
||||
);
|
||||
}
|
||||
|
||||
/// Deserializing a TOML string containing an *invalid* permission should
|
||||
/// fail with a helpful error rather than silently defaulting or
|
||||
/// succeeding.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_illegal_value() {
|
||||
let toml_bad = r#"sandbox_permissions = ["not-a-real-permission"]"#;
|
||||
fn test_sandbox_config_parsing() {
|
||||
let sandbox_full_access = r#"
|
||||
sandbox_mode = "danger-full-access"
|
||||
|
||||
let err = toml::from_str::<ConfigToml>(toml_bad)
|
||||
.expect_err("Deserialization should fail for invalid permission");
|
||||
[sandbox_workspace_write]
|
||||
network_access = false # This should be ignored.
|
||||
"#;
|
||||
let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
sandbox_full_access_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
|
||||
// Make sure the error message contains the invalid value so users have
|
||||
// useful feedback.
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("not-a-real-permission"));
|
||||
let sandbox_read_only = r#"
|
||||
sandbox_mode = "read-only"
|
||||
|
||||
[sandbox_workspace_write]
|
||||
network_access = true # This should be ignored.
|
||||
"#;
|
||||
|
||||
let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::ReadOnly,
|
||||
sandbox_read_only_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
|
||||
let sandbox_workspace_write = r#"
|
||||
sandbox_mode = "workspace-write"
|
||||
|
||||
[sandbox_workspace_write]
|
||||
writable_roots = [
|
||||
"/tmp",
|
||||
]
|
||||
"#;
|
||||
|
||||
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![PathBuf::from("/tmp")],
|
||||
network_access: false,
|
||||
},
|
||||
sandbox_workspace_write_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
}
|
||||
|
||||
struct PrecedenceTestFixture {
|
||||
@@ -681,8 +703,7 @@ persistence = "none"
|
||||
fn create_test_fixture() -> std::io::Result<PrecedenceTestFixture> {
|
||||
let toml = r#"
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
approval_policy = "untrusted"
|
||||
disable_response_storage = false
|
||||
|
||||
# Can be used to determine which profile to use if not specified by
|
||||
@@ -694,11 +715,16 @@ name = "OpenAI using Chat Completions"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "chat"
|
||||
request_max_retries = 4 # retry failed HTTP requests
|
||||
stream_max_retries = 10 # retry dropped SSE streams
|
||||
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
||||
|
||||
[profiles.o3]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
model_reasoning_effort = "high"
|
||||
model_reasoning_summary = "detailed"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
@@ -729,6 +755,12 @@ disable_response_storage = true
|
||||
env_key: Some("OPENAI_API_KEY".to_string()),
|
||||
wire_api: crate::WireApi::Chat,
|
||||
env_key_instructions: None,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(4),
|
||||
stream_max_retries: Some(10),
|
||||
stream_idle_timeout_ms: Some(300_000),
|
||||
};
|
||||
let model_provider_map = {
|
||||
let mut model_provider_map = built_in_model_providers();
|
||||
@@ -783,13 +815,15 @@ disable_response_storage = true
|
||||
assert_eq!(
|
||||
Config {
|
||||
model: "o3".to_string(),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -801,8 +835,12 @@ disable_response_storage = true
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
model_reasoning_effort: ReasoningEffort::High,
|
||||
model_reasoning_summary: ReasoningSummary::Detailed,
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -825,13 +863,15 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_gpt3_profile_config = Config {
|
||||
model: "gpt-3.5-turbo".to_string(),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
model_provider_id: "openai-chat-completions".to_string(),
|
||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||
approval_policy: AskForApproval::UnlessAllowListed,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -845,6 +885,10 @@ disable_response_storage = true
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -882,13 +926,15 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_zdr_profile_config = Config {
|
||||
model: "o3".to_string(),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::OnFailure,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: true,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -902,6 +948,10 @@ disable_response_storage = true
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::config_types::ReasoningEffort;
|
||||
use crate::config_types::ReasoningSummary;
|
||||
use crate::protocol::AskForApproval;
|
||||
|
||||
/// Collection of common configuration options that a user can define as a unit
|
||||
@@ -12,4 +14,7 @@ pub struct ConfigProfile {
|
||||
pub model_provider: Option<String>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub disable_response_storage: Option<bool>,
|
||||
pub model_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub model_reasoning_summary: Option<ReasoningSummary>,
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
// definitions that do not contain business logic.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use strum_macros::Display;
|
||||
use wildmatch::WildMatchPattern;
|
||||
|
||||
@@ -90,6 +91,28 @@ pub struct Tui {
|
||||
pub disable_mouse_capture: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, Copy, PartialEq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum SandboxMode {
|
||||
#[serde(rename = "read-only")]
|
||||
#[default]
|
||||
ReadOnly,
|
||||
|
||||
#[serde(rename = "workspace-write")]
|
||||
WorkspaceWrite,
|
||||
|
||||
#[serde(rename = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct SandboxWorkplaceWrite {
|
||||
#[serde(default)]
|
||||
pub writable_roots: Vec<PathBuf>,
|
||||
#[serde(default)]
|
||||
pub network_access: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum ShellEnvironmentPolicyInherit {
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
use crate::models::ResponseItem;
|
||||
|
||||
/// Transcript of conversation history that is needed:
|
||||
/// - for ZDR clients for which previous_response_id is not available, so we
|
||||
/// must include the transcript with every API call. This must include each
|
||||
/// `function_call` and its corresponding `function_call_output`.
|
||||
/// - for clients using the "chat completions" API as opposed to the
|
||||
/// "responses" API.
|
||||
#[derive(Debug, Clone)]
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ConversationHistory {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
@@ -44,7 +39,8 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
ResponseItem::Message { role, .. } => role.as_str() != "system",
|
||||
ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::LocalShellCall { .. } => true,
|
||||
ResponseItem::Reasoning { .. } | ResponseItem::Other => false,
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,41 +225,20 @@ fn create_linux_sandbox_command_args(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Vec<String> {
|
||||
let mut linux_cmd: Vec<String> = vec![];
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
|
||||
|
||||
// Translate individual permissions.
|
||||
// Use high-level helper methods to infer flags when we cannot see the
|
||||
// exact permission list.
|
||||
if sandbox_policy.has_full_disk_read_access() {
|
||||
linux_cmd.extend(["-s", "disk-full-read-access"].map(String::from));
|
||||
}
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_json =
|
||||
serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
|
||||
|
||||
if sandbox_policy.has_full_disk_write_access() {
|
||||
linux_cmd.extend(["-s", "disk-full-write-access"].map(String::from));
|
||||
} else {
|
||||
// Derive granular writable paths (includes cwd if `DiskWriteCwd` is
|
||||
// present).
|
||||
for root in sandbox_policy.get_writable_roots_with_cwd(cwd) {
|
||||
// Check if this path corresponds exactly to cwd to map to
|
||||
// `disk-write-cwd`, otherwise use the generic folder rule.
|
||||
if root == cwd {
|
||||
linux_cmd.extend(["-s", "disk-write-cwd"].map(String::from));
|
||||
} else {
|
||||
linux_cmd.extend([
|
||||
"-s".to_string(),
|
||||
format!("disk-write-folder={}", root.to_string_lossy()),
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sandbox_policy.has_full_network_access() {
|
||||
linux_cmd.extend(["-s", "network-full-access"].map(String::from));
|
||||
}
|
||||
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
linux_cmd.push("--".to_string());
|
||||
let mut linux_cmd: Vec<String> = vec![
|
||||
sandbox_policy_cwd,
|
||||
sandbox_policy_json,
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
"--".to_string(),
|
||||
];
|
||||
|
||||
// Append the original tool command.
|
||||
linux_cmd.extend(command);
|
||||
@@ -405,6 +384,31 @@ async fn spawn_child_async(
|
||||
cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
|
||||
}
|
||||
|
||||
// If this Codex process dies (including being killed via SIGKILL), we want
|
||||
// any child processes that were spawned as part of a `"shell"` tool call
|
||||
// to also be terminated.
|
||||
|
||||
// This relies on prctl(2), so it only works on Linux.
|
||||
#[cfg(target_os = "linux")]
|
||||
unsafe {
|
||||
cmd.pre_exec(|| {
|
||||
// This prctl call effectively requests, "deliver SIGTERM when my
|
||||
// current parent dies."
|
||||
if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 {
|
||||
return Err(io::Error::last_os_error());
|
||||
}
|
||||
|
||||
// Though if there was a race condition and this pre_exec() block is
|
||||
// run _after_ the parent (i.e., the Codex process) has already
|
||||
// exited, then the parent is the _init_ process (which will never
|
||||
// die), so we should just terminate the child process now.
|
||||
if libc::getppid() == 1 {
|
||||
libc::raise(libc::SIGTERM);
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
|
||||
match stdio_policy {
|
||||
StdioPolicy::RedirectForShellTool => {
|
||||
// Do not create a file descriptor for stdin because otherwise some
|
||||
|
||||
@@ -11,14 +11,6 @@ env_flags! {
|
||||
pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
|
||||
value.parse().map(Duration::from_millis)
|
||||
};
|
||||
pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;
|
||||
pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;
|
||||
|
||||
// We generally don't want to disconnect; this updates the timeout to be five minutes
|
||||
// which matches the upstream typescript codex impl.
|
||||
pub OPENAI_STREAM_IDLE_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
|
||||
value.parse().map(Duration::from_millis)
|
||||
};
|
||||
|
||||
/// Fixture path for offline tests (see client.rs).
|
||||
pub CODEX_RS_SSE_FIXTURE: Option<&str> = None;
|
||||
|
||||
@@ -23,9 +23,9 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
let cmd0 = command.first().map(String::as_str);
|
||||
|
||||
match cmd0 {
|
||||
Some(
|
||||
"cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "rg" | "tail" | "wc" | "which",
|
||||
) => true,
|
||||
Some("cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "tail" | "wc" | "which") => {
|
||||
true
|
||||
}
|
||||
|
||||
Some("find") => {
|
||||
// Certain options to `find` can delete files, write to files, or
|
||||
@@ -46,6 +46,29 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
|
||||
}
|
||||
|
||||
// Ripgrep
|
||||
Some("rg") => {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre",
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
"--hostname-bin",
|
||||
];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
];
|
||||
|
||||
!command.iter().any(|arg| {
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
|
||||
// Git
|
||||
Some("git") => matches!(
|
||||
command.get(1).map(String::as_str),
|
||||
@@ -240,8 +263,41 @@ mod tests {
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {:?} to be unsafe",
|
||||
args
|
||||
"expected {args:?} to be unsafe"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ripgrep_rules() {
|
||||
// Safe ripgrep invocations – none of the unsafe flags are present.
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"rg",
|
||||
"Cargo.toml",
|
||||
"-n"
|
||||
])));
|
||||
|
||||
// Unsafe flags that do not take an argument (present verbatim).
|
||||
for args in [
|
||||
vec_str(&["rg", "--search-zip", "files"]),
|
||||
vec_str(&["rg", "-z", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to zip-search flag",
|
||||
);
|
||||
}
|
||||
|
||||
// Unsafe flags that expect a value, provided in both split and = forms.
|
||||
for args in [
|
||||
vec_str(&["rg", "--pre", "pwned", "files"]),
|
||||
vec_str(&["rg", "--pre=pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to external-command flag",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::WireApi;
|
||||
mod models;
|
||||
pub mod openai_api_key;
|
||||
mod openai_model_info;
|
||||
mod openai_tools;
|
||||
mod project_doc;
|
||||
pub mod protocol;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -16,8 +17,13 @@ use codex_mcp_client::McpClient;
|
||||
use mcp_types::ClientCapabilities;
|
||||
use mcp_types::Implementation;
|
||||
use mcp_types::Tool;
|
||||
|
||||
use serde_json::json;
|
||||
use sha1::Digest;
|
||||
use sha1::Sha1;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::config_types::McpServerConfig;
|
||||
|
||||
@@ -26,7 +32,8 @@ use crate::config_types::McpServerConfig;
|
||||
///
|
||||
/// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
|
||||
/// choose a delimiter from this character set.
|
||||
const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";
|
||||
const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
||||
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
||||
|
||||
/// Timeout for the `tools/list` request.
|
||||
const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
@@ -35,16 +42,42 @@ const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
/// spawned successfully.
|
||||
pub type ClientStartErrors = HashMap<String, anyhow::Error>;
|
||||
|
||||
fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
|
||||
format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
|
||||
fn qualify_tools(tools: Vec<ToolInfo>) -> HashMap<String, ToolInfo> {
|
||||
let mut used_names = HashSet::new();
|
||||
let mut qualified_tools = HashMap::new();
|
||||
for tool in tools {
|
||||
let mut qualified_name = format!(
|
||||
"{}{}{}",
|
||||
tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
|
||||
);
|
||||
if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(qualified_name.as_bytes());
|
||||
let sha1 = hasher.finalize();
|
||||
let sha1_str = format!("{sha1:x}");
|
||||
|
||||
// Truncate to make room for the hash suffix
|
||||
let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
|
||||
|
||||
qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
|
||||
}
|
||||
|
||||
if used_names.contains(&qualified_name) {
|
||||
warn!("skipping duplicated tool {}", qualified_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
used_names.insert(qualified_name.clone());
|
||||
qualified_tools.insert(qualified_name, tool);
|
||||
}
|
||||
|
||||
qualified_tools
|
||||
}
|
||||
|
||||
pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
|
||||
let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
|
||||
if server.is_empty() || tool.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((server.to_string(), tool.to_string()))
|
||||
struct ToolInfo {
|
||||
server_name: String,
|
||||
tool_name: String,
|
||||
tool: Tool,
|
||||
}
|
||||
|
||||
/// A thin wrapper around a set of running [`McpClient`] instances.
|
||||
@@ -57,7 +90,7 @@ pub(crate) struct McpConnectionManager {
|
||||
clients: HashMap<String, std::sync::Arc<McpClient>>,
|
||||
|
||||
/// Fully qualified tool name -> tool instance.
|
||||
tools: HashMap<String, Tool>,
|
||||
tools: HashMap<String, ToolInfo>,
|
||||
}
|
||||
|
||||
impl McpConnectionManager {
|
||||
@@ -79,9 +112,19 @@ impl McpConnectionManager {
|
||||
|
||||
// Launch all configured servers concurrently.
|
||||
let mut join_set = JoinSet::new();
|
||||
let mut errors = ClientStartErrors::new();
|
||||
|
||||
for (server_name, cfg) in mcp_servers {
|
||||
// TODO: Verify server name: require `^[a-zA-Z0-9_-]+$`?
|
||||
// Validate server name before spawning
|
||||
if !is_valid_mcp_server_name(&server_name) {
|
||||
let error = anyhow::anyhow!(
|
||||
"invalid server name '{}': must match pattern ^[a-zA-Z0-9_-]+$",
|
||||
server_name
|
||||
);
|
||||
errors.insert(server_name, error);
|
||||
continue;
|
||||
}
|
||||
|
||||
join_set.spawn(async move {
|
||||
let McpServerConfig { command, args, env } = cfg;
|
||||
let client_res = McpClient::new_stdio_client(command, args, env).await;
|
||||
@@ -93,10 +136,14 @@ impl McpConnectionManager {
|
||||
experimental: None,
|
||||
roots: None,
|
||||
sampling: None,
|
||||
// https://modelcontextprotocol.io/specification/2025-06-18/client/elicitation#capabilities
|
||||
// indicates this should be an empty object.
|
||||
elicitation: Some(json!({})),
|
||||
},
|
||||
client_info: Implementation {
|
||||
name: "codex-mcp-client".to_owned(),
|
||||
version: env!("CARGO_PKG_VERSION").to_owned(),
|
||||
title: Some("Codex".into()),
|
||||
},
|
||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
|
||||
};
|
||||
@@ -117,7 +164,6 @@ impl McpConnectionManager {
|
||||
|
||||
let mut clients: HashMap<String, std::sync::Arc<McpClient>> =
|
||||
HashMap::with_capacity(join_set.len());
|
||||
let mut errors = ClientStartErrors::new();
|
||||
|
||||
while let Some(res) = join_set.join_next().await {
|
||||
let (server_name, client_res) = res?; // JoinError propagation
|
||||
@@ -132,7 +178,9 @@ impl McpConnectionManager {
|
||||
}
|
||||
}
|
||||
|
||||
let tools = list_all_tools(&clients).await?;
|
||||
let all_tools = list_all_tools(&clients).await?;
|
||||
|
||||
let tools = qualify_tools(all_tools);
|
||||
|
||||
Ok((Self { clients, tools }, errors))
|
||||
}
|
||||
@@ -140,7 +188,10 @@ impl McpConnectionManager {
|
||||
/// Returns a single map that contains **all** tools. Each key is the
|
||||
/// fully-qualified name for the tool.
|
||||
pub fn list_all_tools(&self) -> HashMap<String, Tool> {
|
||||
self.tools.clone()
|
||||
self.tools
|
||||
.iter()
|
||||
.map(|(name, tool)| (name.clone(), tool.tool.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Invoke the tool indicated by the (server, tool) pair.
|
||||
@@ -162,13 +213,19 @@ impl McpConnectionManager {
|
||||
.await
|
||||
.with_context(|| format!("tool call failed for `{server}/{tool}`"))
|
||||
}
|
||||
|
||||
pub fn parse_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
self.tools
|
||||
.get(tool_name)
|
||||
.map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Query every server for its available tools and return a single map that
|
||||
/// contains **all** tools. Each key is the fully-qualified name for the tool.
|
||||
pub async fn list_all_tools(
|
||||
async fn list_all_tools(
|
||||
clients: &HashMap<String, std::sync::Arc<McpClient>>,
|
||||
) -> Result<HashMap<String, Tool>> {
|
||||
) -> Result<Vec<ToolInfo>> {
|
||||
let mut join_set = JoinSet::new();
|
||||
|
||||
// Spawn one task per server so we can query them concurrently. This
|
||||
@@ -185,18 +242,19 @@ pub async fn list_all_tools(
|
||||
});
|
||||
}
|
||||
|
||||
let mut aggregated: HashMap<String, Tool> = HashMap::with_capacity(join_set.len());
|
||||
let mut aggregated: Vec<ToolInfo> = Vec::with_capacity(join_set.len());
|
||||
|
||||
while let Some(join_res) = join_set.join_next().await {
|
||||
let (server_name, list_result) = join_res?;
|
||||
let list_result = list_result?;
|
||||
|
||||
for tool in list_result.tools {
|
||||
// TODO(mbolin): escape tool names that contain invalid characters.
|
||||
let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
|
||||
if aggregated.insert(fq_name.clone(), tool).is_some() {
|
||||
panic!("tool name collision for '{fq_name}': suspicious");
|
||||
}
|
||||
let tool_info = ToolInfo {
|
||||
server_name: server_name.clone(),
|
||||
tool_name: tool.name.clone(),
|
||||
tool,
|
||||
};
|
||||
aggregated.push(tool_info);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,3 +266,99 @@ pub async fn list_all_tools(
|
||||
|
||||
Ok(aggregated)
|
||||
}
|
||||
|
||||
fn is_valid_mcp_server_name(server_name: &str) -> bool {
|
||||
!server_name.is_empty()
|
||||
&& server_name
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use mcp_types::ToolInputSchema;
|
||||
|
||||
fn create_test_tool(server_name: &str, tool_name: &str) -> ToolInfo {
|
||||
ToolInfo {
|
||||
server_name: server_name.to_string(),
|
||||
tool_name: tool_name.to_string(),
|
||||
tool: Tool {
|
||||
annotations: None,
|
||||
description: Some(format!("Test tool: {tool_name}")),
|
||||
input_schema: ToolInputSchema {
|
||||
properties: None,
|
||||
required: None,
|
||||
r#type: "object".to_string(),
|
||||
},
|
||||
name: tool_name.to_string(),
|
||||
output_schema: None,
|
||||
title: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_short_non_duplicated_names() {
|
||||
let tools = vec![
|
||||
create_test_tool("server1", "tool1"),
|
||||
create_test_tool("server1", "tool2"),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
assert_eq!(qualified_tools.len(), 2);
|
||||
assert!(qualified_tools.contains_key("server1__tool1"));
|
||||
assert!(qualified_tools.contains_key("server1__tool2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_duplicated_names_skipped() {
|
||||
let tools = vec![
|
||||
create_test_tool("server1", "duplicate_tool"),
|
||||
create_test_tool("server1", "duplicate_tool"),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
// Only the first tool should remain, the second is skipped
|
||||
assert_eq!(qualified_tools.len(), 1);
|
||||
assert!(qualified_tools.contains_key("server1__duplicate_tool"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_long_names_same_server() {
|
||||
let server_name = "my_server";
|
||||
|
||||
let tools = vec![
|
||||
create_test_tool(
|
||||
server_name,
|
||||
"extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
|
||||
),
|
||||
create_test_tool(
|
||||
server_name,
|
||||
"yet_another_extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
|
||||
),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
assert_eq!(qualified_tools.len(), 2);
|
||||
|
||||
let mut keys: Vec<_> = qualified_tools.keys().cloned().collect();
|
||||
keys.sort();
|
||||
|
||||
assert_eq!(keys[0].len(), 64);
|
||||
assert_eq!(
|
||||
keys[0],
|
||||
"my_server__extremely_lena02e507efc5a9de88637e436690364fd4219e4ef"
|
||||
);
|
||||
|
||||
assert_eq!(keys[1].len(), 64);
|
||||
assert_eq!(
|
||||
keys[1],
|
||||
"my_server__yet_another_e1c3987bd9c50b826cbe1687966f79f0c602d19ca"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,18 @@ use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::env::VarError;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::error::EnvVarError;
|
||||
use crate::openai_api_key::get_openai_api_key;
|
||||
|
||||
/// Value for the `OpenAI-Originator` header that is sent with requests to
|
||||
/// OpenAI.
|
||||
const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
|
||||
const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
|
||||
const DEFAULT_STREAM_MAX_RETRIES: u64 = 10;
|
||||
const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;
|
||||
|
||||
/// Wire protocol that the provider speaks. Most third-party services only
|
||||
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
|
||||
/// itself (and a handful of others) additionally expose the more modern
|
||||
@@ -22,10 +30,11 @@ use crate::openai_api_key::get_openai_api_key;
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum WireApi {
|
||||
/// The experimental “Responses” API exposed by OpenAI at `/v1/responses`.
|
||||
#[default]
|
||||
/// The experimental "Responses" API exposed by OpenAI at `/v1/responses`.
|
||||
Responses,
|
||||
|
||||
/// Regular Chat Completions compatible with `/v1/chat/completions`.
|
||||
#[default]
|
||||
Chat,
|
||||
}
|
||||
|
||||
@@ -44,14 +53,103 @@ pub struct ModelProviderInfo {
|
||||
pub env_key_instructions: Option<String>,
|
||||
|
||||
/// Which wire protocol this provider expects.
|
||||
#[serde(default)]
|
||||
pub wire_api: WireApi,
|
||||
|
||||
/// Optional query parameters to append to the base URL.
|
||||
pub query_params: Option<HashMap<String, String>>,
|
||||
|
||||
/// Additional HTTP headers to include in requests to this provider where
|
||||
/// the (key, value) pairs are the header name and value.
|
||||
pub http_headers: Option<HashMap<String, String>>,
|
||||
|
||||
/// Optional HTTP headers to include in requests to this provider where the
|
||||
/// (key, value) pairs are the header name and _environment variable_ whose
|
||||
/// value should be used. If the environment variable is not set, or the
|
||||
/// value is empty, the header will not be included in the request.
|
||||
pub env_http_headers: Option<HashMap<String, String>>,
|
||||
|
||||
/// Maximum number of times to retry a failed HTTP request to this provider.
|
||||
pub request_max_retries: Option<u64>,
|
||||
|
||||
/// Number of times to retry reconnecting a dropped streaming response before failing.
|
||||
pub stream_max_retries: Option<u64>,
|
||||
|
||||
/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
|
||||
/// the connection as lost.
|
||||
pub stream_idle_timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl ModelProviderInfo {
|
||||
/// Construct a `POST` RequestBuilder for the given URL using the provided
|
||||
/// reqwest Client applying:
|
||||
/// • provider-specific headers (static + env based)
|
||||
/// • Bearer auth header when an API key is available.
|
||||
///
|
||||
/// When `require_api_key` is true and the provider declares an `env_key`
|
||||
/// but the variable is missing/empty, returns an [`Err`] identical to the
|
||||
/// one produced by [`ModelProviderInfo::api_key`].
|
||||
pub fn create_request_builder<'a>(
|
||||
&'a self,
|
||||
client: &'a reqwest::Client,
|
||||
) -> crate::error::Result<reqwest::RequestBuilder> {
|
||||
let api_key = self.api_key()?;
|
||||
|
||||
let url = self.get_full_url();
|
||||
|
||||
let mut builder = client.post(url);
|
||||
if let Some(key) = api_key {
|
||||
builder = builder.bearer_auth(key);
|
||||
}
|
||||
|
||||
Ok(self.apply_http_headers(builder))
|
||||
}
|
||||
|
||||
pub(crate) fn get_full_url(&self) -> String {
|
||||
let query_string = self
|
||||
.query_params
|
||||
.as_ref()
|
||||
.map_or_else(String::new, |params| {
|
||||
let full_params = params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{k}={v}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
format!("?{full_params}")
|
||||
});
|
||||
let base_url = &self.base_url;
|
||||
match self.wire_api {
|
||||
WireApi::Responses => format!("{base_url}/responses{query_string}"),
|
||||
WireApi::Chat => format!("{base_url}/chat/completions{query_string}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply provider-specific HTTP headers (both static and environment-based)
|
||||
/// onto an existing `reqwest::RequestBuilder` and return the updated
|
||||
/// builder.
|
||||
fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
|
||||
if let Some(extra) = &self.http_headers {
|
||||
for (k, v) in extra {
|
||||
builder = builder.header(k, v);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(env_headers) = &self.env_http_headers {
|
||||
for (header, env_var) in env_headers {
|
||||
if let Ok(val) = std::env::var(env_var) {
|
||||
if !val.trim().is_empty() {
|
||||
builder = builder.header(header, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder
|
||||
}
|
||||
|
||||
/// If `env_key` is Some, returns the API key for this provider if present
|
||||
/// (and non-empty) in the environment. If `env_key` is required but
|
||||
/// cannot be found, returns an error.
|
||||
pub fn api_key(&self) -> crate::error::Result<Option<String>> {
|
||||
fn api_key(&self) -> crate::error::Result<Option<String>> {
|
||||
match &self.env_key {
|
||||
Some(env_key) => {
|
||||
let env_value = if env_key == crate::openai_api_key::OPENAI_API_KEY_ENV_VAR {
|
||||
@@ -77,91 +175,73 @@ impl ModelProviderInfo {
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Effective maximum number of request retries for this provider.
|
||||
pub fn request_max_retries(&self) -> u64 {
|
||||
self.request_max_retries
|
||||
.unwrap_or(DEFAULT_REQUEST_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective maximum number of stream reconnection attempts for this provider.
|
||||
pub fn stream_max_retries(&self) -> u64 {
|
||||
self.stream_max_retries
|
||||
.unwrap_or(DEFAULT_STREAM_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective idle timeout for streaming responses.
|
||||
pub fn stream_idle_timeout(&self) -> Duration {
|
||||
self.stream_idle_timeout_ms
|
||||
.map(Duration::from_millis)
|
||||
.unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))
|
||||
}
|
||||
}
|
||||
|
||||
/// Built-in default provider list.
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
use ModelProviderInfo as P;
|
||||
|
||||
// We do not want to be in the business of adjucating which third-party
|
||||
// providers are bundled with Codex CLI, so we only include the OpenAI
|
||||
// provider by default. Users are encouraged to add to `model_providers`
|
||||
// in config.toml to add their own providers.
|
||||
[
|
||||
(
|
||||
"openai",
|
||||
P {
|
||||
name: "OpenAI".into(),
|
||||
base_url: "https://api.openai.com/v1".into(),
|
||||
// Allow users to override the default OpenAI endpoint by
|
||||
// exporting `OPENAI_BASE_URL`. This is useful when pointing
|
||||
// Codex at a proxy, mock server, or Azure-style deployment
|
||||
// without requiring a full TOML override for the built-in
|
||||
// OpenAI provider.
|
||||
base_url: std::env::var("OPENAI_BASE_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.unwrap_or_else(|| "https://api.openai.com/v1".to_string()),
|
||||
env_key: Some("OPENAI_API_KEY".into()),
|
||||
env_key_instructions: Some("Create an API key (https://platform.openai.com) and export it as an environment variable.".into()),
|
||||
wire_api: WireApi::Responses,
|
||||
},
|
||||
),
|
||||
(
|
||||
"openrouter",
|
||||
P {
|
||||
name: "OpenRouter".into(),
|
||||
base_url: "https://openrouter.ai/api/v1".into(),
|
||||
env_key: Some("OPENROUTER_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"gemini",
|
||||
P {
|
||||
name: "Gemini".into(),
|
||||
base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
|
||||
env_key: Some("GEMINI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"ollama",
|
||||
P {
|
||||
name: "Ollama".into(),
|
||||
base_url: "http://localhost:11434/v1".into(),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"mistral",
|
||||
P {
|
||||
name: "Mistral".into(),
|
||||
base_url: "https://api.mistral.ai/v1".into(),
|
||||
env_key: Some("MISTRAL_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"deepseek",
|
||||
P {
|
||||
name: "DeepSeek".into(),
|
||||
base_url: "https://api.deepseek.com".into(),
|
||||
env_key: Some("DEEPSEEK_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"xai",
|
||||
P {
|
||||
name: "xAI".into(),
|
||||
base_url: "https://api.x.ai/v1".into(),
|
||||
env_key: Some("XAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"groq",
|
||||
P {
|
||||
name: "Groq".into(),
|
||||
base_url: "https://api.groq.com/openai/v1".into(),
|
||||
env_key: Some("GROQ_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: Some(
|
||||
[
|
||||
("originator".to_string(), OPENAI_ORIGINATOR_HEADER.to_string()),
|
||||
("version".to_string(), env!("CARGO_PKG_VERSION").to_string()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
env_http_headers: Some(
|
||||
[
|
||||
("OpenAI-Organization".to_string(), "OPENAI_ORGANIZATION".to_string()),
|
||||
("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
// Use global defaults for retry/timeout unless overridden in config.toml.
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -169,3 +249,93 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_ollama_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Ollama".into(),
|
||||
base_url: "http://localhost:11434/v1".into(),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_azure_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Azure"
|
||||
base_url = "https://xxxxx.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY"
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Azure".into(),
|
||||
base_url: "https://xxxxx.openai.azure.com/openai".into(),
|
||||
env_key: Some("AZURE_OPENAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: Some(maplit::hashmap! {
|
||||
"api-version".to_string() => "2025-04-01-preview".to_string(),
|
||||
}),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_example_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Example"
|
||||
base_url = "https://example.com"
|
||||
env_key = "API_KEY"
|
||||
http_headers = { "X-Example-Header" = "example-value" }
|
||||
env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Example".into(),
|
||||
base_url: "https://example.com".into(),
|
||||
env_key: Some("API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: Some(maplit::hashmap! {
|
||||
"X-Example-Header".to_string() => "example-value".to_string(),
|
||||
}),
|
||||
env_http_headers: Some(maplit::hashmap! {
|
||||
"X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
|
||||
}),
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::collections::HashMap;
|
||||
use base64::Engine;
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
use serde::Deserializer;
|
||||
use serde::Serialize;
|
||||
use serde::ser::Serializer;
|
||||
|
||||
@@ -37,12 +38,14 @@ pub enum ContentItem {
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseItem {
|
||||
Message {
|
||||
id: Option<String>,
|
||||
role: String,
|
||||
content: Vec<ContentItem>,
|
||||
},
|
||||
Reasoning {
|
||||
id: String,
|
||||
summary: Vec<ReasoningItemReasoningSummary>,
|
||||
encrypted_content: Option<String>,
|
||||
},
|
||||
LocalShellCall {
|
||||
/// Set when using the chat completions API.
|
||||
@@ -53,6 +56,7 @@ pub enum ResponseItem {
|
||||
action: LocalShellAction,
|
||||
},
|
||||
FunctionCall {
|
||||
id: Option<String>,
|
||||
name: String,
|
||||
// The Responses API returns the function call arguments as a *string* that contains
|
||||
// JSON, not as an already‑parsed object. We keep it as a raw string here and let
|
||||
@@ -78,7 +82,11 @@ pub enum ResponseItem {
|
||||
impl From<ResponseInputItem> for ResponseItem {
|
||||
fn from(item: ResponseInputItem) -> Self {
|
||||
match item {
|
||||
ResponseInputItem::Message { role, content } => Self::Message { role, content },
|
||||
ResponseInputItem::Message { role, content } => Self::Message {
|
||||
role,
|
||||
content,
|
||||
id: None,
|
||||
},
|
||||
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
||||
Self::FunctionCallOutput { call_id, output }
|
||||
}
|
||||
@@ -145,7 +153,7 @@ impl From<Vec<InputItem>> for ResponseInputItem {
|
||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
|
||||
Some(ContentItem::InputImage {
|
||||
image_url: format!("data:{};base64,{}", mime, encoded),
|
||||
image_url: format!("data:{mime};base64,{encoded}"),
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -177,7 +185,7 @@ pub struct ShellToolCallParams {
|
||||
pub timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FunctionCallOutputPayload {
|
||||
pub content: String,
|
||||
#[expect(dead_code)]
|
||||
@@ -205,6 +213,19 @@ impl Serialize for FunctionCallOutputPayload {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
Ok(FunctionCallOutputPayload {
|
||||
content: s,
|
||||
success: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Implement Display so callers can treat the payload like a plain string when logging or doing
|
||||
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
|
||||
// returns the raw `content` field.
|
||||
|
||||
71
codex-rs/core/src/openai_model_info.rs
Normal file
71
codex-rs/core/src/openai_model_info.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
/// Metadata about a model, particularly OpenAI models.
|
||||
/// We may want to consider including details like the pricing for
|
||||
/// input tokens, output tokens, etc., though users will need to be able to
|
||||
/// override this in config.toml, as this information can get out of date.
|
||||
/// Though this would help present more accurate pricing information in the UI.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ModelInfo {
|
||||
/// Size of the context window in tokens.
|
||||
pub(crate) context_window: u64,
|
||||
|
||||
/// Maximum number of output tokens that can be generated for the model.
|
||||
pub(crate) max_output_tokens: u64,
|
||||
}
|
||||
|
||||
/// Note details such as what a model like gpt-4o is aliased to may be out of
|
||||
/// date.
|
||||
pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
|
||||
match name {
|
||||
// https://platform.openai.com/docs/models/o3
|
||||
"o3" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/o4-mini
|
||||
"o4-mini" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/codex-mini-latest
|
||||
"codex-mini-latest" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
||||
// https://platform.openai.com/docs/models/gpt-4.1
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
|
||||
context_window: 1_047_576,
|
||||
max_output_tokens: 32_768,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
||||
// https://platform.openai.com/docs/models/gpt-4o
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||
"gpt-3.5-turbo" => Some(ModelInfo {
|
||||
context_window: 16_385,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -27,16 +27,16 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||||
/// string of instructions.
|
||||
pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||||
match find_project_doc(config).await {
|
||||
Ok(Some(project_doc)) => match &config.instructions {
|
||||
Ok(Some(project_doc)) => match &config.user_instructions {
|
||||
Some(original_instructions) => Some(format!(
|
||||
"{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||||
)),
|
||||
None => Some(project_doc),
|
||||
},
|
||||
Ok(None) => config.instructions.clone(),
|
||||
Ok(None) => config.user_instructions.clone(),
|
||||
Err(e) => {
|
||||
error!("error trying to find project doc: {e:#}");
|
||||
config.instructions.clone()
|
||||
config.user_instructions.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -159,7 +159,7 @@ mod tests {
|
||||
config.cwd = root.path().to_path_buf();
|
||||
config.project_doc_max_bytes = limit;
|
||||
|
||||
config.instructions = instructions.map(ToOwned::to_owned);
|
||||
config.user_instructions = instructions.map(ToOwned::to_owned);
|
||||
config
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
@@ -43,8 +44,12 @@ pub enum Op {
|
||||
model_reasoning_effort: ReasoningEffortConfig,
|
||||
model_reasoning_summary: ReasoningSummaryConfig,
|
||||
|
||||
/// Model instructions
|
||||
instructions: Option<String>,
|
||||
/// Model instructions that are appended to the base instructions.
|
||||
user_instructions: Option<String>,
|
||||
|
||||
/// Base instructions override.
|
||||
base_instructions: Option<String>,
|
||||
|
||||
/// When to escalate for approval for execution
|
||||
approval_policy: AskForApproval,
|
||||
/// How to sandbox commands executed in the system
|
||||
@@ -68,6 +73,10 @@ pub enum Op {
|
||||
/// `ConfigureSession` operation so that the business-logic layer can
|
||||
/// operate deterministically.
|
||||
cwd: std::path::PathBuf,
|
||||
|
||||
/// Path to a rollout file to resume from.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
resume_path: Option<std::path::PathBuf>,
|
||||
},
|
||||
|
||||
/// Abort current task.
|
||||
@@ -107,23 +116,22 @@ pub enum Op {
|
||||
|
||||
/// Request a single history entry identified by `log_id` + `offset`.
|
||||
GetHistoryEntryRequest { offset: usize, log_id: u64 },
|
||||
|
||||
/// Request to shut down codex instance.
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
/// Determines how liberally commands are auto‑approved by the system.
|
||||
/// Determines the conditions under which the user is consulted to approve
|
||||
/// running the command proposed by Codex.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum AskForApproval {
|
||||
/// Under this policy, only “known safe” commands—as determined by
|
||||
/// Under this policy, only "known safe" commands—as determined by
|
||||
/// `is_safe_command()`—that **only read files** are auto‑approved.
|
||||
/// Everything else will ask the user to approve.
|
||||
#[default]
|
||||
UnlessAllowListed,
|
||||
|
||||
/// In addition to everything allowed by **`Suggest`**, commands that
|
||||
/// *write* to files **within the user’s approved list of writable paths**
|
||||
/// are also auto‑approved.
|
||||
/// TODO(ragona): fix
|
||||
AutoEdit,
|
||||
#[serde(rename = "untrusted")]
|
||||
UnlessTrusted,
|
||||
|
||||
/// *All* commands are auto‑approved, but they are expected to run inside a
|
||||
/// sandbox where network access is disabled and writes are confined to a
|
||||
@@ -136,155 +144,104 @@ pub enum AskForApproval {
|
||||
Never,
|
||||
}
|
||||
|
||||
/// Determines execution restrictions for model shell commands
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct SandboxPolicy {
|
||||
permissions: Vec<SandboxPermission>,
|
||||
/// Determines execution restrictions for model shell commands.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
pub enum SandboxPolicy {
|
||||
/// No restrictions whatsoever. Use with caution.
|
||||
#[serde(rename = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
|
||||
/// Read-only access to the entire file-system.
|
||||
#[serde(rename = "read-only")]
|
||||
ReadOnly,
|
||||
|
||||
/// Same as `ReadOnly` but additionally grants write access to the current
|
||||
/// working directory ("workspace").
|
||||
#[serde(rename = "workspace-write")]
|
||||
WorkspaceWrite {
|
||||
/// Additional folders (beyond cwd and possibly TMPDIR) that should be
|
||||
/// writable from within the sandbox.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
|
||||
/// When set to `true`, outbound network access is allowed. `false` by
|
||||
/// default.
|
||||
#[serde(default)]
|
||||
network_access: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Vec<SandboxPermission>> for SandboxPolicy {
|
||||
fn from(permissions: Vec<SandboxPermission>) -> Self {
|
||||
Self { permissions }
|
||||
impl FromStr for SandboxPolicy {
|
||||
type Err = serde_json::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
serde_json::from_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxPolicy {
|
||||
/// Returns a policy with read-only disk access and no network.
|
||||
pub fn new_read_only_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![SandboxPermission::DiskFullReadAccess],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_read_only_policy_with_writable_roots(writable_roots: &[PathBuf]) -> Self {
|
||||
let mut permissions = Self::new_read_only_policy().permissions;
|
||||
permissions.extend(writable_roots.iter().map(|folder| {
|
||||
SandboxPermission::DiskWriteFolder {
|
||||
folder: folder.clone(),
|
||||
}
|
||||
}));
|
||||
Self { permissions }
|
||||
}
|
||||
|
||||
pub fn new_full_auto_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::DiskWritePlatformUserTempFolder,
|
||||
SandboxPermission::DiskWriteCwd,
|
||||
],
|
||||
SandboxPolicy::ReadOnly
|
||||
}
|
||||
|
||||
/// Returns a policy that can read the entire disk, but can only write to
|
||||
/// the current working directory and the per-user tmp dir on macOS. It does
|
||||
/// not allow network access.
|
||||
pub fn new_workspace_write_policy() -> Self {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Always returns `true` for now, as we do not yet support restricting read
|
||||
/// access.
|
||||
pub fn has_full_disk_read_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullReadAccess))
|
||||
true
|
||||
}
|
||||
|
||||
pub fn has_full_disk_write_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullWriteAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_full_network_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::NetworkFullAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => *network_access,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the list of writable roots that should be passed down to the
|
||||
/// Landlock rules installer, tailored to the current working directory.
|
||||
pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::<PathBuf>::new();
|
||||
for perm in &self.permissions {
|
||||
use SandboxPermission::*;
|
||||
match perm {
|
||||
DiskWritePlatformUserTempFolder => {
|
||||
if cfg!(target_os = "macos") {
|
||||
if let Some(tempdir) = std::env::var_os("TMPDIR") {
|
||||
// Likely something that starts with /var/folders/...
|
||||
let tmpdir_path = PathBuf::from(&tempdir);
|
||||
if tmpdir_path.is_absolute() {
|
||||
writable_roots.push(tmpdir_path.clone());
|
||||
match tmpdir_path.canonicalize() {
|
||||
Ok(canonicalized) => {
|
||||
// Likely something that starts with /private/var/folders/...
|
||||
if canonicalized != tmpdir_path {
|
||||
writable_roots.push(canonicalized);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to canonicalize TMPDIR: {e}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::error!("TMPDIR is not an absolute path: {tempdir:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => Vec::new(),
|
||||
SandboxPolicy::ReadOnly => Vec::new(),
|
||||
SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
|
||||
let mut roots = writable_roots.clone();
|
||||
roots.push(cwd.to_path_buf());
|
||||
|
||||
// For Linux, should this be XDG_RUNTIME_DIR, /run/user/<uid>, or something else?
|
||||
}
|
||||
DiskWritePlatformGlobalTempFolder => {
|
||||
if cfg!(unix) {
|
||||
writable_roots.push(PathBuf::from("/tmp"));
|
||||
// Also include the per-user tmp dir on macOS.
|
||||
// Note this is added dynamically rather than storing it in
|
||||
// writable_roots because writable_roots contains only static
|
||||
// values deserialized from the config file.
|
||||
if cfg!(target_os = "macos") {
|
||||
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
|
||||
roots.push(PathBuf::from(tmpdir));
|
||||
}
|
||||
}
|
||||
DiskWriteCwd => {
|
||||
writable_roots.push(cwd.to_path_buf());
|
||||
}
|
||||
DiskWriteFolder { folder } => {
|
||||
writable_roots.push(folder.clone());
|
||||
}
|
||||
DiskFullReadAccess | NetworkFullAccess => {}
|
||||
DiskFullWriteAccess => {
|
||||
// Currently, we expect callers to only invoke this method
|
||||
// after verifying has_full_disk_write_access() is false.
|
||||
}
|
||||
|
||||
roots
|
||||
}
|
||||
}
|
||||
writable_roots
|
||||
}
|
||||
|
||||
pub fn is_unrestricted(&self) -> bool {
|
||||
self.has_full_disk_read_access()
|
||||
&& self.has_full_disk_write_access()
|
||||
&& self.has_full_network_access()
|
||||
}
|
||||
}
|
||||
|
||||
/// Permissions that should be granted to the sandbox in which the agent
|
||||
/// operates.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum SandboxPermission {
|
||||
/// Is allowed to read all files on disk.
|
||||
DiskFullReadAccess,
|
||||
|
||||
/// Is allowed to write to the operating system's temp dir that
|
||||
/// is restricted to the user the agent is running as. For
|
||||
/// example, on macOS, this is generally something under
|
||||
/// `/var/folders` as opposed to `/tmp`.
|
||||
DiskWritePlatformUserTempFolder,
|
||||
|
||||
/// Is allowed to write to the operating system's shared temp
|
||||
/// dir. On UNIX, this is generally `/tmp`.
|
||||
DiskWritePlatformGlobalTempFolder,
|
||||
|
||||
/// Is allowed to write to the current working directory (in practice, this
|
||||
/// is the `cwd` where `codex` was spawned).
|
||||
DiskWriteCwd,
|
||||
|
||||
/// Is allowed to the specified folder. `PathBuf` must be an
|
||||
/// absolute path, though it is up to the caller to canonicalize
|
||||
/// it if the path contains symlinks.
|
||||
DiskWriteFolder { folder: PathBuf },
|
||||
|
||||
/// Is allowed to write to any file on disk.
|
||||
DiskFullWriteAccess,
|
||||
|
||||
/// Can make arbitrary network requests.
|
||||
NetworkFullAccess,
|
||||
}
|
||||
|
||||
/// User input
|
||||
@@ -329,12 +286,22 @@ pub enum EventMsg {
|
||||
/// Agent has completed all actions
|
||||
TaskComplete(TaskCompleteEvent),
|
||||
|
||||
/// Token count event, sent periodically to report the number of tokens
|
||||
/// used in the current session.
|
||||
TokenCount(TokenUsage),
|
||||
|
||||
/// Agent text output message
|
||||
AgentMessage(AgentMessageEvent),
|
||||
|
||||
/// Agent text output delta message
|
||||
AgentMessageDelta(AgentMessageDeltaEvent),
|
||||
|
||||
/// Reasoning event from agent.
|
||||
AgentReasoning(AgentReasoningEvent),
|
||||
|
||||
/// Agent reasoning delta event from agent.
|
||||
AgentReasoningDelta(AgentReasoningDeltaEvent),
|
||||
|
||||
/// Ack the client's configure message.
|
||||
SessionConfigured(SessionConfiguredEvent),
|
||||
|
||||
@@ -362,6 +329,9 @@ pub enum EventMsg {
|
||||
|
||||
/// Response to GetHistoryEntryRequest.
|
||||
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
|
||||
|
||||
/// Notification that the agent is shutting down.
|
||||
ShutdownComplete,
|
||||
}
|
||||
|
||||
// Individual event payload types matching each `EventMsg` variant.
|
||||
@@ -376,16 +346,35 @@ pub struct TaskCompleteEvent {
|
||||
pub last_agent_message: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub cached_input_tokens: Option<u64>,
|
||||
pub output_tokens: u64,
|
||||
pub reasoning_output_tokens: Option<u64>,
|
||||
pub total_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentMessageEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentMessageDeltaEvent {
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentReasoningEvent {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentReasoningDeltaEvent {
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct McpToolCallBeginEvent {
|
||||
/// Identifier so this can be paired with the McpToolCallEnd event.
|
||||
@@ -439,6 +428,8 @@ pub struct ExecCommandEndEvent {
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
@@ -450,6 +441,8 @@ pub struct ExecApprovalRequestEvent {
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
|
||||
@@ -1,33 +1,47 @@
|
||||
//! Functionality to persist a Codex conversation *rollout* – a linear list of
|
||||
//! [`ResponseItem`] objects exchanged during a session – to disk so that
|
||||
//! sessions can be replayed or inspected later (mirrors the behaviour of the
|
||||
//! upstream TypeScript implementation).
|
||||
//! Persist Codex session rollouts (.jsonl) so sessions can be replayed or inspected later.
|
||||
|
||||
use std::fs::File;
|
||||
use std::fs::{self};
|
||||
use std::io::Error as IoError;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
use tokio::sync::mpsc::{self};
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::models::ResponseItem;
|
||||
|
||||
/// Folder inside `~/.codex` that holds saved rollouts.
|
||||
const SESSIONS_SUBDIR: &str = "sessions";
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SessionMeta {
|
||||
id: String,
|
||||
timestamp: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
instructions: Option<String>,
|
||||
#[derive(Serialize, Deserialize, Clone, Default)]
|
||||
pub struct SessionMeta {
|
||||
pub id: Uuid,
|
||||
pub timestamp: String,
|
||||
pub instructions: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, Clone)]
|
||||
pub struct SessionStateSnapshot {}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, Clone)]
|
||||
pub struct SavedSession {
|
||||
pub session: SessionMeta,
|
||||
#[serde(default)]
|
||||
pub items: Vec<ResponseItem>,
|
||||
#[serde(default)]
|
||||
pub state: SessionStateSnapshot,
|
||||
pub session_id: Uuid,
|
||||
}
|
||||
|
||||
/// Records all [`ResponseItem`]s for a session and flushes them to disk after
|
||||
@@ -41,7 +55,13 @@ struct SessionMeta {
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct RolloutRecorder {
|
||||
tx: Sender<String>,
|
||||
tx: Sender<RolloutCmd>,
|
||||
}
|
||||
|
||||
enum RolloutCmd {
|
||||
AddItems(Vec<ResponseItem>),
|
||||
UpdateState(SessionStateSnapshot),
|
||||
Shutdown { ack: oneshot::Sender<()> },
|
||||
}
|
||||
|
||||
impl RolloutRecorder {
|
||||
@@ -59,7 +79,6 @@ impl RolloutRecorder {
|
||||
timestamp,
|
||||
} = create_log_file(config, uuid)?;
|
||||
|
||||
// Build the static session metadata JSON first.
|
||||
let timestamp_format: &[FormatItem] = format_description!(
|
||||
"[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z"
|
||||
);
|
||||
@@ -69,46 +88,29 @@ impl RolloutRecorder {
|
||||
|
||||
let meta = SessionMeta {
|
||||
timestamp,
|
||||
id: session_id.to_string(),
|
||||
id: session_id,
|
||||
instructions,
|
||||
};
|
||||
|
||||
// A reasonably-sized bounded channel. If the buffer fills up the send
|
||||
// future will yield, which is fine – we only need to ensure we do not
|
||||
// perform *blocking* I/O on the caller’s thread.
|
||||
let (tx, mut rx) = mpsc::channel::<String>(256);
|
||||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||||
|
||||
// Spawn a Tokio task that owns the file handle and performs async
|
||||
// writes. Using `tokio::fs::File` keeps everything on the async I/O
|
||||
// driver instead of blocking the runtime.
|
||||
tokio::task::spawn(async move {
|
||||
let mut file = tokio::fs::File::from_std(file);
|
||||
tokio::task::spawn(rollout_writer(
|
||||
tokio::fs::File::from_std(file),
|
||||
rx,
|
||||
Some(meta),
|
||||
));
|
||||
|
||||
while let Some(line) = rx.recv().await {
|
||||
// Write line + newline, then flush to disk.
|
||||
if let Err(e) = file.write_all(line.as_bytes()).await {
|
||||
tracing::warn!("rollout writer: failed to write line: {e}");
|
||||
break;
|
||||
}
|
||||
if let Err(e) = file.write_all(b"\n").await {
|
||||
tracing::warn!("rollout writer: failed to write newline: {e}");
|
||||
break;
|
||||
}
|
||||
if let Err(e) = file.flush().await {
|
||||
tracing::warn!("rollout writer: failed to flush: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let recorder = Self { tx };
|
||||
// Ensure SessionMeta is the first item in the file.
|
||||
recorder.record_item(&meta).await?;
|
||||
Ok(recorder)
|
||||
Ok(Self { tx })
|
||||
}
|
||||
|
||||
/// Append `items` to the rollout file.
|
||||
pub(crate) async fn record_items(&self, items: &[ResponseItem]) -> std::io::Result<()> {
|
||||
let mut filtered = Vec::new();
|
||||
for item in items {
|
||||
match item {
|
||||
// Note that function calls may look a bit strange if they are
|
||||
@@ -117,27 +119,106 @@ impl RolloutRecorder {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. } => {}
|
||||
ResponseItem::Reasoning { .. } | ResponseItem::Other => {
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => filtered.push(item.clone()),
|
||||
ResponseItem::Other => {
|
||||
// These should never be serialized.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
self.record_item(item).await?;
|
||||
}
|
||||
Ok(())
|
||||
if filtered.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
self.tx
|
||||
.send(RolloutCmd::AddItems(filtered))
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout items: {e}")))
|
||||
}
|
||||
|
||||
async fn record_item(&self, item: &impl Serialize) -> std::io::Result<()> {
|
||||
// Serialize the item to JSON first so that the writer thread only has
|
||||
// to perform the actual write.
|
||||
let json = serde_json::to_string(item)
|
||||
.map_err(|e| IoError::other(format!("failed to serialize response items: {e}")))?;
|
||||
|
||||
pub(crate) async fn record_state(&self, state: SessionStateSnapshot) -> std::io::Result<()> {
|
||||
self.tx
|
||||
.send(json)
|
||||
.send(RolloutCmd::UpdateState(state))
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout item: {e}")))
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
|
||||
}
|
||||
|
||||
pub async fn resume(path: &Path) -> std::io::Result<(Self, SavedSession)> {
|
||||
info!("Resuming rollout from {path:?}");
|
||||
let text = tokio::fs::read_to_string(path).await?;
|
||||
let mut lines = text.lines();
|
||||
let meta_line = lines
|
||||
.next()
|
||||
.ok_or_else(|| IoError::other("empty session file"))?;
|
||||
let session: SessionMeta = serde_json::from_str(meta_line)
|
||||
.map_err(|e| IoError::other(format!("failed to parse session meta: {e}")))?;
|
||||
let mut items = Vec::new();
|
||||
let mut state = SessionStateSnapshot::default();
|
||||
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if v.get("record_type")
|
||||
.and_then(|rt| rt.as_str())
|
||||
.map(|s| s == "state")
|
||||
.unwrap_or(false)
|
||||
{
|
||||
if let Ok(s) = serde_json::from_value::<SessionStateSnapshot>(v.clone()) {
|
||||
state = s
|
||||
}
|
||||
continue;
|
||||
}
|
||||
match serde_json::from_value::<ResponseItem>(v.clone()) {
|
||||
Ok(item) => match item {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => items.push(item),
|
||||
ResponseItem::Other => {}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to parse item: {v:?}, error: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let saved = SavedSession {
|
||||
session: session.clone(),
|
||||
items: items.clone(),
|
||||
state: state.clone(),
|
||||
session_id: session.id,
|
||||
};
|
||||
|
||||
let file = std::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.read(true)
|
||||
.open(path)?;
|
||||
|
||||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||||
tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
|
||||
info!("Resumed rollout successfully from {path:?}");
|
||||
Ok((Self { tx }, saved))
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) -> std::io::Result<()> {
|
||||
let (tx_done, rx_done) = oneshot::channel();
|
||||
match self.tx.send(RolloutCmd::Shutdown { ack: tx_done }).await {
|
||||
Ok(_) => rx_done
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed waiting for rollout shutdown: {e}"))),
|
||||
Err(e) => {
|
||||
warn!("failed to send rollout shutdown command: {e}");
|
||||
Err(IoError::other(format!(
|
||||
"failed to send rollout shutdown command: {e}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,13 +234,15 @@ struct LogFileInfo {
|
||||
}
|
||||
|
||||
fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFileInfo> {
|
||||
// Resolve ~/.codex/sessions and create it if missing.
|
||||
let mut dir = config.codex_home.clone();
|
||||
dir.push(SESSIONS_SUBDIR);
|
||||
fs::create_dir_all(&dir)?;
|
||||
|
||||
// Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing.
|
||||
let timestamp = OffsetDateTime::now_local()
|
||||
.map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
|
||||
let mut dir = config.codex_home.clone();
|
||||
dir.push(SESSIONS_SUBDIR);
|
||||
dir.push(timestamp.year().to_string());
|
||||
dir.push(format!("{:02}", u8::from(timestamp.month())));
|
||||
dir.push(format!("{:02}", timestamp.day()));
|
||||
fs::create_dir_all(&dir)?;
|
||||
|
||||
// Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for
|
||||
// compatibility with filesystems that do not allow colons in filenames.
|
||||
@@ -183,3 +266,58 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||||
timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
async fn rollout_writer(
|
||||
mut file: tokio::fs::File,
|
||||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||||
meta: Option<SessionMeta>,
|
||||
) {
|
||||
if let Some(meta) = meta {
|
||||
if let Ok(json) = serde_json::to_string(&meta) {
|
||||
let _ = file.write_all(json.as_bytes()).await;
|
||||
let _ = file.write_all(b"\n").await;
|
||||
let _ = file.flush().await;
|
||||
}
|
||||
}
|
||||
while let Some(cmd) = rx.recv().await {
|
||||
match cmd {
|
||||
RolloutCmd::AddItems(items) => {
|
||||
for item in items {
|
||||
match item {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => {
|
||||
if let Ok(json) = serde_json::to_string(&item) {
|
||||
let _ = file.write_all(json.as_bytes()).await;
|
||||
let _ = file.write_all(b"\n").await;
|
||||
}
|
||||
}
|
||||
ResponseItem::Other => {}
|
||||
}
|
||||
}
|
||||
let _ = file.flush().await;
|
||||
}
|
||||
RolloutCmd::UpdateState(state) => {
|
||||
#[derive(Serialize)]
|
||||
struct StateLine<'a> {
|
||||
record_type: &'static str,
|
||||
#[serde(flatten)]
|
||||
state: &'a SessionStateSnapshot,
|
||||
}
|
||||
if let Ok(json) = serde_json::to_string(&StateLine {
|
||||
record_type: "state",
|
||||
state: &state,
|
||||
}) {
|
||||
let _ = file.write_all(json.as_bytes()).await;
|
||||
let _ = file.write_all(b"\n").await;
|
||||
let _ = file.flush().await;
|
||||
}
|
||||
}
|
||||
RolloutCmd::Shutdown { ack } => {
|
||||
let _ = ack.send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,12 +31,12 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
|
||||
match policy {
|
||||
AskForApproval::OnFailure | AskForApproval::AutoEdit | AskForApproval::Never => {
|
||||
AskForApproval::OnFailure | AskForApproval::Never => {
|
||||
// Continue to see if this can be auto-approved.
|
||||
}
|
||||
// TODO(ragona): I'm not sure this is actually correct? I believe in this case
|
||||
// we want to continue to the writable paths check before asking the user.
|
||||
AskForApproval::UnlessAllowListed => {
|
||||
AskForApproval::UnlessTrusted => {
|
||||
return SafetyCheck::AskUser;
|
||||
}
|
||||
}
|
||||
@@ -63,40 +63,71 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
}
|
||||
|
||||
/// For a command to be run _without_ a sandbox, one of the following must be
|
||||
/// true:
|
||||
///
|
||||
/// - the user has explicitly approved the command
|
||||
/// - the command is on the "known safe" list
|
||||
/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
|
||||
pub fn assess_command_safety(
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approved: &HashSet<Vec<String>>,
|
||||
) -> SafetyCheck {
|
||||
let approve_without_sandbox = || SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
use AskForApproval::*;
|
||||
use SandboxPolicy::*;
|
||||
|
||||
// Previously approved or allow-listed commands
|
||||
// All approval modes allow these commands to continue without sandboxing
|
||||
// A command is "trusted" because either:
|
||||
// - it belongs to a set of commands we consider "safe" by default, or
|
||||
// - the user has explicitly approved the command for this session
|
||||
//
|
||||
// Currently, whether a command is "trusted" is a simple boolean, but we
|
||||
// should include more metadata on this command test to indicate whether it
|
||||
// should be run inside a sandbox or not. (This could be something the user
|
||||
// defines as part of `execpolicy`.)
|
||||
//
|
||||
// For example, when `is_known_safe_command(command)` returns `true`, it
|
||||
// would probably be fine to run the command in a sandbox, but when
|
||||
// `approved.contains(command)` is `true`, the user may have approved it for
|
||||
// the session _because_ they know it needs to run outside a sandbox.
|
||||
if is_known_safe_command(command) || approved.contains(command) {
|
||||
// TODO(ragona): I think we should consider running even these inside the sandbox, but it's
|
||||
// a change in behavior so I'm keeping it at parity with upstream for now.
|
||||
return approve_without_sandbox();
|
||||
return SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
}
|
||||
|
||||
// Command was not known-safe or allow-listed
|
||||
if sandbox_policy.is_unrestricted() {
|
||||
approve_without_sandbox()
|
||||
} else {
|
||||
match get_platform_sandbox() {
|
||||
// We have a sandbox, so we can approve the command in all modes
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
// We do not have a sandbox, so we need to consider the approval policy
|
||||
match approval_policy {
|
||||
// Never is our "non-interactive" mode; it must automatically reject
|
||||
AskForApproval::Never => SafetyCheck::Reject {
|
||||
reason: "auto-rejected by user approval settings".to_string(),
|
||||
},
|
||||
// Otherwise, we ask the user for approval
|
||||
_ => SafetyCheck::AskUser,
|
||||
match (approval_policy, sandbox_policy) {
|
||||
(UnlessTrusted, _) => {
|
||||
// Even though the user may have opted into DangerFullAccess,
|
||||
// they also requested that we ask for approval for untrusted
|
||||
// commands.
|
||||
SafetyCheck::AskUser
|
||||
}
|
||||
(OnFailure, DangerFullAccess) | (Never, DangerFullAccess) => SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
},
|
||||
(Never, ReadOnly)
|
||||
| (Never, WorkspaceWrite { .. })
|
||||
| (OnFailure, ReadOnly)
|
||||
| (OnFailure, WorkspaceWrite { .. }) => {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
if matches!(approval_policy, OnFailure) {
|
||||
// Since the command is not trusted, even though the
|
||||
// user has requested to only ask for approval on
|
||||
// failure, we will ask the user because no sandbox is
|
||||
// available.
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
// We are in non-interactive mode and lack approval, so
|
||||
// all we can do is reject the command.
|
||||
SafetyCheck::Reject {
|
||||
reason: "auto-rejected because command is not on trusted list"
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
8
codex-rs/core/tests/cli_responses_fixture.sse
Normal file
8
codex-rs/core/tests/cli_responses_fixture.sse
Normal file
@@ -0,0 +1,8 @@
|
||||
event: response.created
|
||||
data: {"type":"response.created","response":{"id":"resp1"}}
|
||||
|
||||
event: response.output_item.done
|
||||
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
|
||||
|
||||
event: response.completed
|
||||
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
|
||||
361
codex-rs/core/tests/cli_stream.rs
Normal file
361
codex-rs/core/tests/cli_stream.rs
Normal file
@@ -0,0 +1,361 @@
|
||||
#![expect(clippy::unwrap_used)]
|
||||
|
||||
use assert_cmd::Command as AssertCommand;
|
||||
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use tempfile::TempDir;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Tests streaming chat completions through the CLI using a mock server.
|
||||
/// This test:
|
||||
/// 1. Sets up a mock server that simulates OpenAI's chat completions API
|
||||
/// 2. Configures codex to use this mock server via a custom provider
|
||||
/// 3. Sends a simple "hello?" prompt and verifies the streamed response
|
||||
/// 4. Ensures the response is received exactly once and contains "hi"
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn chat_mode_stream_cli() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{}}]}\n\n",
|
||||
"data: [DONE]\n\n"
|
||||
);
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream"),
|
||||
)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let provider_override = format!(
|
||||
"model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"chat\" }}",
|
||||
server.uri()
|
||||
);
|
||||
let mut cmd = AssertCommand::new("cargo");
|
||||
cmd.arg("run")
|
||||
.arg("-p")
|
||||
.arg("codex-cli")
|
||||
.arg("--quiet")
|
||||
.arg("--")
|
||||
.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-c")
|
||||
.arg(&provider_override)
|
||||
.arg("-c")
|
||||
.arg("model_provider=\"mock\"")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("hello?");
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
println!("Status: {}", output.status);
|
||||
println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
|
||||
println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let hi_lines = stdout.lines().filter(|line| line.trim() == "hi").count();
|
||||
assert_eq!(hi_lines, 1, "Expected exactly one line with 'hi'");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Tests streaming responses through the CLI using a local SSE fixture file.
|
||||
/// This test:
|
||||
/// 1. Uses a pre-recorded SSE response fixture instead of a live server
|
||||
/// 2. Configures codex to read from this fixture via CODEX_RS_SSE_FIXTURE env var
|
||||
/// 3. Sends a "hello?" prompt and verifies the response
|
||||
/// 4. Ensures the fixture content is correctly streamed through the CLI
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn responses_api_stream_cli() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let fixture =
|
||||
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut cmd = AssertCommand::new("cargo");
|
||||
cmd.arg("run")
|
||||
.arg("-p")
|
||||
.arg("codex-cli")
|
||||
.arg("--quiet")
|
||||
.arg("--")
|
||||
.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("hello?");
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(stdout.contains("fixture hello"));
|
||||
}
|
||||
|
||||
/// End-to-end: create a session (writes rollout), verify the file, then resume and confirm append.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn integration_creates_and_checks_session_file() {
|
||||
// Honor sandbox network restrictions for CI parity with the other tests.
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. Temp home so we read/write isolated session files.
|
||||
let home = TempDir::new().unwrap();
|
||||
|
||||
// 2. Unique marker we'll look for in the session log.
|
||||
let marker = format!("integration-test-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
// 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
|
||||
let fixture =
|
||||
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
|
||||
|
||||
// 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
|
||||
// which is what records a session.
|
||||
let mut cmd = AssertCommand::new("cargo");
|
||||
cmd.arg("run")
|
||||
.arg("-p")
|
||||
.arg("codex-cli")
|
||||
.arg("--quiet")
|
||||
.arg("--")
|
||||
.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt);
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
// Required for CLI arg parsing even though fixture short-circuits network usage.
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
assert!(
|
||||
output.status.success(),
|
||||
"codex-cli exec failed: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
// Wait for sessions dir to appear.
|
||||
let sessions_dir = home.path().join("sessions");
|
||||
let dir_deadline = Instant::now() + Duration::from_secs(5);
|
||||
while !sessions_dir.exists() && Instant::now() < dir_deadline {
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
assert!(sessions_dir.exists(), "sessions directory never appeared");
|
||||
|
||||
// Find the session file that contains `marker`.
|
||||
let deadline = Instant::now() + Duration::from_secs(10);
|
||||
let mut matching_path: Option<std::path::PathBuf> = None;
|
||||
while Instant::now() < deadline && matching_path.is_none() {
|
||||
for entry in WalkDir::new(&sessions_dir) {
|
||||
let entry = match entry {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
|
||||
continue;
|
||||
}
|
||||
let path = entry.path();
|
||||
let Ok(content) = std::fs::read_to_string(path) else {
|
||||
continue;
|
||||
};
|
||||
let mut lines = content.lines();
|
||||
if lines.next().is_none() {
|
||||
continue;
|
||||
}
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let item: serde_json::Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if item.get("type").and_then(|t| t.as_str()) == Some("message") {
|
||||
if let Some(c) = item.get("content") {
|
||||
if c.to_string().contains(&marker) {
|
||||
matching_path = Some(path.to_path_buf());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if matching_path.is_none() {
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
}
|
||||
|
||||
let path = match matching_path {
|
||||
Some(p) => p,
|
||||
None => panic!("No session file containing the marker was found"),
|
||||
};
|
||||
|
||||
// Basic sanity checks on location and metadata.
|
||||
let rel = match path.strip_prefix(&sessions_dir) {
|
||||
Ok(r) => r,
|
||||
Err(_) => panic!("session file should live under sessions/"),
|
||||
};
|
||||
let comps: Vec<String> = rel
|
||||
.components()
|
||||
.map(|c| c.as_os_str().to_string_lossy().into_owned())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
comps.len(),
|
||||
4,
|
||||
"Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
|
||||
);
|
||||
let year = &comps[0];
|
||||
let month = &comps[1];
|
||||
let day = &comps[2];
|
||||
assert!(
|
||||
year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
|
||||
"Year dir not 4-digit numeric: {year}"
|
||||
);
|
||||
assert!(
|
||||
month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
|
||||
"Month dir not zero-padded 2-digit numeric: {month}"
|
||||
);
|
||||
assert!(
|
||||
day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
|
||||
"Day dir not zero-padded 2-digit numeric: {day}"
|
||||
);
|
||||
if let Ok(m) = month.parse::<u8>() {
|
||||
assert!((1..=12).contains(&m), "Month out of range: {m}");
|
||||
}
|
||||
if let Ok(d) = day.parse::<u8>() {
|
||||
assert!((1..=31).contains(&d), "Day out of range: {d}");
|
||||
}
|
||||
|
||||
let content =
|
||||
std::fs::read_to_string(&path).unwrap_or_else(|_| panic!("Failed to read session file"));
|
||||
let mut lines = content.lines();
|
||||
let meta_line = lines
|
||||
.next()
|
||||
.ok_or("missing session meta line")
|
||||
.unwrap_or_else(|_| panic!("missing session meta line"));
|
||||
let meta: serde_json::Value = serde_json::from_str(meta_line)
|
||||
.unwrap_or_else(|_| panic!("Failed to parse session meta line as JSON"));
|
||||
assert!(meta.get("id").is_some(), "SessionMeta missing id");
|
||||
assert!(
|
||||
meta.get("timestamp").is_some(),
|
||||
"SessionMeta missing timestamp"
|
||||
);
|
||||
|
||||
let mut found_message = false;
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let Ok(item) = serde_json::from_str::<serde_json::Value>(line) else {
|
||||
continue;
|
||||
};
|
||||
if item.get("type").and_then(|t| t.as_str()) == Some("message") {
|
||||
if let Some(c) = item.get("content") {
|
||||
if c.to_string().contains(&marker) {
|
||||
found_message = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
found_message,
|
||||
"No message found in session file containing the marker"
|
||||
);
|
||||
|
||||
// Second run: resume and append.
|
||||
let orig_len = content.lines().count();
|
||||
let marker2 = format!("integration-resume-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
// Cross‑platform safe resume override. On Windows, backslashes in a TOML string must be escaped
|
||||
// or the parse will fail and the raw literal (including quotes) may be preserved all the way down
|
||||
// to Config, which in turn breaks resume because the path is invalid. Normalize to forward slashes
|
||||
// to sidestep the issue.
|
||||
let resume_path_str = path.to_string_lossy().replace('\\', "/");
|
||||
let resume_override = format!("experimental_resume=\"{resume_path_str}\"");
|
||||
let mut cmd2 = AssertCommand::new("cargo");
|
||||
cmd2.arg("run")
|
||||
.arg("-p")
|
||||
.arg("codex-cli")
|
||||
.arg("--quiet")
|
||||
.arg("--")
|
||||
.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-c")
|
||||
.arg(&resume_override)
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2);
|
||||
cmd2.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
let output2 = cmd2.output().unwrap();
|
||||
assert!(output2.status.success(), "resume codex-cli run failed");
|
||||
|
||||
// The rollout writer runs on a background async task; give it a moment to flush.
|
||||
let mut new_len = orig_len;
|
||||
let deadline = Instant::now() + Duration::from_secs(5);
|
||||
let mut content2 = String::new();
|
||||
while Instant::now() < deadline {
|
||||
if let Ok(c) = std::fs::read_to_string(&path) {
|
||||
let count = c.lines().count();
|
||||
if count > orig_len {
|
||||
content2 = c;
|
||||
new_len = count;
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
if content2.is_empty() {
|
||||
// last attempt
|
||||
content2 = std::fs::read_to_string(&path).unwrap();
|
||||
new_len = content2.lines().count();
|
||||
}
|
||||
assert!(new_len > orig_len, "rollout file did not grow after resume");
|
||||
assert!(content2.contains(&marker), "rollout lost original marker");
|
||||
assert!(
|
||||
content2.contains(&marker2),
|
||||
"rollout missing resumed marker"
|
||||
);
|
||||
}
|
||||
174
codex-rs/core/tests/client.rs
Normal file
174
codex-rs/core/tests/client.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
use codex_core::Codex;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
mod test_support;
|
||||
use tempfile::TempDir;
|
||||
use test_support::load_default_config_for_test;
|
||||
use test_support::load_sse_fixture_with_id;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Build minimal SSE stream with completed marker using the JSON fixture.
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn includes_session_id_and_model_headers_in_request() {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mock server
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// First request – must NOT include `previous_response_id`.
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp1"), "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(first)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "openai".into(),
|
||||
base_url: format!("{}/v1", server.uri()),
|
||||
// Environment variable that should exist in the test environment.
|
||||
// ModelClient will return an error if the environment variable for the
|
||||
// provider is not set.
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: Some(
|
||||
[("originator".to_string(), "codex_cli_rs".to_string())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
// Init session
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = model_provider;
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let (codex, _init_id, _session_id) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let EventMsg::SessionConfigured(SessionConfiguredEvent { session_id, .. }) =
|
||||
test_support::wait_for_event(&codex, |ev| matches!(ev, EventMsg::SessionConfigured(_)))
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let current_session_id = Some(session_id.to_string());
|
||||
test_support::wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// get request from the server
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let request_body = request.headers.get("session_id").unwrap();
|
||||
let originator = request.headers.get("originator").unwrap();
|
||||
|
||||
assert!(current_session_id.is_some());
|
||||
assert_eq!(
|
||||
request_body.to_str().unwrap(),
|
||||
current_session_id.as_ref().unwrap()
|
||||
);
|
||||
assert_eq!(originator.to_str().unwrap(), "codex_cli_rs");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn includes_base_instructions_override_in_request() {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
// Mock server
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// First request – must NOT include `previous_response_id`.
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp1"), "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(first)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "openai".into(),
|
||||
base_url: format!("{}/v1", server.uri()),
|
||||
// Environment variable that should exist in the test environment.
|
||||
// ModelClient will return an error if the environment variable for the
|
||||
// provider is not set.
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
|
||||
config.base_instructions = Some("test instructions".to_string());
|
||||
config.model_provider = model_provider;
|
||||
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let (codex, ..) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
test_support::wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let request_body = request.body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
assert!(
|
||||
request_body["instructions"]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.contains("test instructions")
|
||||
);
|
||||
}
|
||||
16
codex-rs/core/tests/fixtures/completed_template.json
vendored
Normal file
16
codex-rs/core/tests/fixtures/completed_template.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
[
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]
|
||||
3
codex-rs/core/tests/fixtures/incomplete_sse.json
vendored
Normal file
3
codex-rs/core/tests/fixtures/incomplete_sse.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[
|
||||
{"type": "response.output_item.done"}
|
||||
]
|
||||
@@ -45,23 +45,12 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
|
||||
"OPENAI_API_KEY must be set for live tests"
|
||||
);
|
||||
|
||||
// Environment tweaks to keep the tests snappy and inexpensive while still
|
||||
// exercising retry/robustness logic.
|
||||
//
|
||||
// NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe`
|
||||
// because changing the process environment races with any other threads
|
||||
// that might be performing environment look-ups at the same time.
|
||||
// Restrict the unsafety to this tiny block that happens at the very
|
||||
// beginning of the test, before we spawn any background tasks that could
|
||||
// observe the environment.
|
||||
unsafe {
|
||||
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2");
|
||||
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2");
|
||||
}
|
||||
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let config = load_default_config_for_test(&codex_home);
|
||||
let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider.request_max_retries = Some(2);
|
||||
config.model_provider.stream_max_retries = Some(2);
|
||||
let (agent, _init_id, _session_id) =
|
||||
Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
|
||||
|
||||
Ok(agent)
|
||||
}
|
||||
@@ -79,7 +68,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
|
||||
let codex = spawn_codex().await.unwrap();
|
||||
|
||||
// ---------- Task 1 ----------
|
||||
// ---------- Task 1 ----------
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
@@ -113,7 +102,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
"Agent did not stream any AgentMessage before TaskComplete"
|
||||
);
|
||||
|
||||
// ---------- Task 2 (same session) ----------
|
||||
// ---------- Task 2 (same session) ----------
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::Codex;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
mod test_support;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
use test_support::load_default_config_for_test;
|
||||
use tokio::time::timeout;
|
||||
use wiremock::Match;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Request;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Matcher asserting that JSON body has NO `previous_response_id` field.
|
||||
struct NoPrevId;
|
||||
|
||||
impl Match for NoPrevId {
|
||||
fn matches(&self, req: &Request) -> bool {
|
||||
serde_json::from_slice::<Value>(&req.body)
|
||||
.map(|v| v.get("previous_response_id").is_none())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Matcher asserting that JSON body HAS a `previous_response_id` field.
|
||||
struct HasPrevId;
|
||||
|
||||
impl Match for HasPrevId {
|
||||
fn matches(&self, req: &Request) -> bool {
|
||||
serde_json::from_slice::<Value>(&req.body)
|
||||
.map(|v| v.get("previous_response_id").is_some())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Build minimal SSE stream with completed marker.
|
||||
fn sse_completed(id: &str) -> String {
|
||||
format!(
|
||||
"event: response.completed\n\
|
||||
data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"{}\",\"output\":[]}}}}\n\n\n",
|
||||
id
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn keeps_previous_response_id_between_tasks() {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mock server
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// First request – must NOT include `previous_response_id`.
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp1"), "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.and(NoPrevId)
|
||||
.respond_with(first)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Second request – MUST include `previous_response_id`.
|
||||
let second = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp2"), "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.and(HasPrevId)
|
||||
.respond_with(second)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Environment
|
||||
// Update environment – `set_var` is `unsafe` starting with the 2024
|
||||
// edition so we group the calls into a single `unsafe { … }` block.
|
||||
unsafe {
|
||||
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
|
||||
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
|
||||
}
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "openai".into(),
|
||||
base_url: format!("{}/v1", server.uri()),
|
||||
// Environment variable that should exist in the test environment.
|
||||
// ModelClient will return an error if the environment variable for the
|
||||
// provider is not set.
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
};
|
||||
|
||||
// Init session
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = model_provider;
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();
|
||||
|
||||
// Task 1 – triggers first request (no previous_response_id)
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait for TaskComplete
|
||||
loop {
|
||||
let ev = timeout(Duration::from_secs(1), codex.next_event())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
if matches!(ev.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Task 2 – should include `previous_response_id` (triggers second request)
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "again".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait for TaskComplete or error
|
||||
loop {
|
||||
let ev = timeout(Duration::from_secs(1), codex.next_event())
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
match ev.msg {
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
panic!("unexpected error: {message}")
|
||||
}
|
||||
_ => {
|
||||
// Ignore other events.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@ use codex_core::protocol::Op;
|
||||
mod test_support;
|
||||
use tempfile::TempDir;
|
||||
use test_support::load_default_config_for_test;
|
||||
use test_support::load_sse_fixture;
|
||||
use test_support::load_sse_fixture_with_id;
|
||||
use tokio::time::timeout;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
@@ -22,16 +24,11 @@ use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn sse_incomplete() -> String {
|
||||
// Only a single line; missing the completed event.
|
||||
"event: response.output_item.done\n\n".to_string()
|
||||
load_sse_fixture("tests/fixtures/incomplete_sse.json")
|
||||
}
|
||||
|
||||
fn sse_completed(id: &str) -> String {
|
||||
format!(
|
||||
"event: response.completed\n\
|
||||
data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"{}\",\"output\":[]}}}}\n\n\n",
|
||||
id
|
||||
)
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -73,19 +70,8 @@ async fn retries_on_early_close() {
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Environment
|
||||
//
|
||||
// As of Rust 2024 `std::env::set_var` has been made `unsafe` because
|
||||
// mutating the process environment is inherently racy when other threads
|
||||
// are running. We therefore have to wrap every call in an explicit
|
||||
// `unsafe` block. These are limited to the test-setup section so the
|
||||
// scope is very small and clearly delineated.
|
||||
|
||||
unsafe {
|
||||
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
|
||||
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1");
|
||||
std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000");
|
||||
}
|
||||
// Configure retry behavior explicitly to avoid mutating process-wide
|
||||
// environment variables.
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "openai".into(),
|
||||
@@ -96,13 +82,20 @@ async fn retries_on_early_close() {
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
// exercise retry path: first attempt yields incomplete stream, so allow 1 retry
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(1),
|
||||
stream_idle_timeout_ms: Some(2000),
|
||||
};
|
||||
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = model_provider;
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c).await.unwrap();
|
||||
let (codex, _init_id, _session_id) = Codex::spawn(config, ctrl_c).await.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
|
||||
@@ -21,3 +21,79 @@ pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
|
||||
)
|
||||
.expect("defaults for test should always succeed")
|
||||
}
|
||||
|
||||
/// Builds an SSE stream body from a JSON fixture.
|
||||
///
|
||||
/// The fixture must contain an array of objects where each object represents a
|
||||
/// single SSE event with at least a `type` field matching the `event:` value.
|
||||
/// Additional fields become the JSON payload for the `data:` line. An object
|
||||
/// with only a `type` field results in an event with no `data:` section. This
|
||||
/// makes it trivial to extend the fixtures as OpenAI adds new event kinds or
|
||||
/// fields.
|
||||
#[allow(dead_code)]
|
||||
pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
|
||||
let events: Vec<serde_json::Value> =
|
||||
serde_json::from_reader(std::fs::File::open(path).expect("read fixture"))
|
||||
.expect("parse JSON fixture");
|
||||
events
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
format!("event: {kind}\n\n")
|
||||
} else {
|
||||
format!("event: {kind}\ndata: {e}\n\n")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the
|
||||
/// fixture template with the supplied identifier before parsing. This lets a
|
||||
/// single JSON template be reused by multiple tests that each need a unique
|
||||
/// `response_id`.
|
||||
#[allow(dead_code)]
|
||||
pub fn load_sse_fixture_with_id(path: impl AsRef<std::path::Path>, id: &str) -> String {
|
||||
let raw = std::fs::read_to_string(path).expect("read fixture template");
|
||||
let replaced = raw.replace("__ID__", id);
|
||||
let events: Vec<serde_json::Value> =
|
||||
serde_json::from_str(&replaced).expect("parse JSON fixture");
|
||||
events
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
format!("event: {kind}\n\n")
|
||||
} else {
|
||||
format!("event: {kind}\ndata: {e}\n\n")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub async fn wait_for_event<F>(
|
||||
codex: &codex_core::Codex,
|
||||
mut predicate: F,
|
||||
) -> codex_core::protocol::EventMsg
|
||||
where
|
||||
F: FnMut(&codex_core::protocol::EventMsg) -> bool,
|
||||
{
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
loop {
|
||||
let ev = timeout(Duration::from_secs(1), codex.next_event())
|
||||
.await
|
||||
.expect("timeout waiting for event")
|
||||
.expect("stream ended unexpectedly");
|
||||
if predicate(&ev.msg) {
|
||||
return ev.msg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,9 +19,12 @@ anyhow = "1"
|
||||
chrono = "0.4.40"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-common = { path = "../common", features = ["cli", "elapsed"] }
|
||||
codex-common = { path = "../common", features = [
|
||||
"cli",
|
||||
"elapsed",
|
||||
"sandbox_summary",
|
||||
] }
|
||||
codex-linux-sandbox = { path = "../linux-sandbox" }
|
||||
mcp-types = { path = "../mcp-types" }
|
||||
owo-colors = "4.2.0"
|
||||
serde_json = "1"
|
||||
shlex = "1.3.0"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -15,16 +14,27 @@ pub struct Cli {
|
||||
#[arg(long, short = 'm')]
|
||||
pub model: Option<String>,
|
||||
|
||||
/// Select the sandbox policy to use when executing model-generated shell
|
||||
/// commands.
|
||||
#[arg(long = "sandbox", short = 's')]
|
||||
pub sandbox_mode: Option<codex_common::SandboxModeCliArg>,
|
||||
|
||||
/// Configuration profile from config.toml to specify default options.
|
||||
#[arg(long = "profile", short = 'p')]
|
||||
pub config_profile: Option<String>,
|
||||
|
||||
/// Convenience alias for low-friction sandboxed automatic execution (network-disabled sandbox that can write to cwd and TMPDIR)
|
||||
/// Convenience alias for low-friction sandboxed automatic execution (-a on-failure, --sandbox workspace-write).
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
/// Skip all confirmation prompts and execute commands without sandboxing.
|
||||
/// EXTREMELY DANGEROUS. Intended solely for running in environments that are externally sandboxed.
|
||||
#[arg(
|
||||
long = "dangerously-bypass-approvals-and-sandbox",
|
||||
default_value_t = false,
|
||||
conflicts_with = "full_auto"
|
||||
)]
|
||||
pub dangerously_bypass_approvals_and_sandbox: bool,
|
||||
|
||||
/// Tell the agent to use the specified directory as its working root.
|
||||
#[clap(long = "cd", short = 'C', value_name = "DIR")]
|
||||
@@ -41,6 +51,10 @@ pub struct Cli {
|
||||
#[arg(long = "color", value_enum, default_value_t = Color::Auto)]
|
||||
pub color: Color,
|
||||
|
||||
/// Print events to stdout as JSONL.
|
||||
#[arg(long = "json", default_value_t = false)]
|
||||
pub json: bool,
|
||||
|
||||
/// Specifies file where the last message from the agent should be written.
|
||||
#[arg(long = "output-last-message")]
|
||||
pub last_message_file: Option<PathBuf>,
|
||||
|
||||
@@ -1,487 +1,70 @@
|
||||
use codex_common::elapsed::format_elapsed;
|
||||
use std::path::Path;
|
||||
|
||||
use codex_common::summarize_sandbox_policy;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::model_supports_reasoning_summaries;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::BackgroundEventEvent;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use owo_colors::OwoColorize;
|
||||
use owo_colors::Style;
|
||||
use shlex::try_join;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
|
||||
/// This should be configurable. When used in CI, users may not want to impose
|
||||
/// a limit so they can see the full transcript.
|
||||
const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
|
||||
|
||||
pub(crate) struct EventProcessor {
|
||||
call_id_to_command: HashMap<String, ExecCommandBegin>,
|
||||
call_id_to_patch: HashMap<String, PatchApplyBegin>,
|
||||
|
||||
/// Tracks in-flight MCP tool calls so we can calculate duration and print
|
||||
/// a concise summary when the corresponding `McpToolCallEnd` event is
|
||||
/// received.
|
||||
call_id_to_tool_call: HashMap<String, McpToolCallBegin>,
|
||||
|
||||
// To ensure that --color=never is respected, ANSI escapes _must_ be added
|
||||
// using .style() with one of these fields. If you need a new style, add a
|
||||
// new field here.
|
||||
bold: Style,
|
||||
italic: Style,
|
||||
dimmed: Style,
|
||||
|
||||
magenta: Style,
|
||||
red: Style,
|
||||
green: Style,
|
||||
cyan: Style,
|
||||
|
||||
/// Whether to include `AgentReasoning` events in the output.
|
||||
show_agent_reasoning: bool,
|
||||
pub(crate) enum CodexStatus {
|
||||
Running,
|
||||
InitiateShutdown,
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
impl EventProcessor {
|
||||
pub(crate) fn create_with_ansi(with_ansi: bool, show_agent_reasoning: bool) -> Self {
|
||||
let call_id_to_command = HashMap::new();
|
||||
let call_id_to_patch = HashMap::new();
|
||||
let call_id_to_tool_call = HashMap::new();
|
||||
pub(crate) trait EventProcessor {
|
||||
/// Print summary of effective configuration and user prompt.
|
||||
fn print_config_summary(&mut self, config: &Config, prompt: &str);
|
||||
|
||||
if with_ansi {
|
||||
Self {
|
||||
call_id_to_command,
|
||||
call_id_to_patch,
|
||||
bold: Style::new().bold(),
|
||||
italic: Style::new().italic(),
|
||||
dimmed: Style::new().dimmed(),
|
||||
magenta: Style::new().magenta(),
|
||||
red: Style::new().red(),
|
||||
green: Style::new().green(),
|
||||
cyan: Style::new().cyan(),
|
||||
call_id_to_tool_call,
|
||||
show_agent_reasoning,
|
||||
}
|
||||
} else {
|
||||
Self {
|
||||
call_id_to_command,
|
||||
call_id_to_patch,
|
||||
bold: Style::new(),
|
||||
italic: Style::new(),
|
||||
dimmed: Style::new(),
|
||||
magenta: Style::new(),
|
||||
red: Style::new(),
|
||||
green: Style::new(),
|
||||
cyan: Style::new(),
|
||||
call_id_to_tool_call,
|
||||
show_agent_reasoning,
|
||||
}
|
||||
/// Handle a single event emitted by the agent.
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus;
|
||||
}
|
||||
|
||||
pub(crate) fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
|
||||
let mut entries = vec![
|
||||
("workdir", config.cwd.display().to_string()),
|
||||
("model", config.model.clone()),
|
||||
("provider", config.model_provider_id.clone()),
|
||||
("approval", format!("{:?}", config.approval_policy)),
|
||||
("sandbox", summarize_sandbox_policy(&config.sandbox_policy)),
|
||||
];
|
||||
if config.model_provider.wire_api == WireApi::Responses
|
||||
&& model_supports_reasoning_summaries(config)
|
||||
{
|
||||
entries.push((
|
||||
"reasoning effort",
|
||||
config.model_reasoning_effort.to_string(),
|
||||
));
|
||||
entries.push((
|
||||
"reasoning summaries",
|
||||
config.model_reasoning_summary.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
|
||||
pub(crate) fn handle_last_message(
|
||||
last_agent_message: Option<&str>,
|
||||
last_message_path: Option<&Path>,
|
||||
) {
|
||||
match (last_message_path, last_agent_message) {
|
||||
(Some(path), Some(msg)) => write_last_message_file(msg, Some(path)),
|
||||
(Some(path), None) => {
|
||||
write_last_message_file("", Some(path));
|
||||
eprintln!(
|
||||
"Warning: no last agent message; wrote empty content to {}",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
(None, _) => eprintln!("Warning: no file to write last message to."),
|
||||
}
|
||||
}
|
||||
|
||||
struct ExecCommandBegin {
|
||||
command: Vec<String>,
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
/// Metadata captured when an `McpToolCallBegin` event is received.
|
||||
struct McpToolCallBegin {
|
||||
/// Formatted invocation string, e.g. `server.tool({"city":"sf"})`.
|
||||
invocation: String,
|
||||
/// Timestamp when the call started so we can compute duration later.
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
struct PatchApplyBegin {
|
||||
start_time: Instant,
|
||||
auto_approved: bool,
|
||||
}
|
||||
|
||||
// Timestamped println helper. The timestamp is styled with self.dimmed.
|
||||
#[macro_export]
|
||||
macro_rules! ts_println {
|
||||
($self:ident, $($arg:tt)*) => {{
|
||||
let now = chrono::Utc::now();
|
||||
let formatted = now.format("[%Y-%m-%dT%H:%M:%S]");
|
||||
print!("{} ", formatted.style($self.dimmed));
|
||||
println!($($arg)*);
|
||||
}};
|
||||
}
|
||||
|
||||
impl EventProcessor {
|
||||
/// Print a concise summary of the effective configuration that will be used
|
||||
/// for the session. This mirrors the information shown in the TUI welcome
|
||||
/// screen.
|
||||
pub(crate) fn print_config_summary(&mut self, config: &Config, prompt: &str) {
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
ts_println!(
|
||||
self,
|
||||
"OpenAI Codex v{} (research preview)\n--------",
|
||||
VERSION
|
||||
);
|
||||
|
||||
let mut entries = vec![
|
||||
("workdir", config.cwd.display().to_string()),
|
||||
("model", config.model.clone()),
|
||||
("provider", config.model_provider_id.clone()),
|
||||
("approval", format!("{:?}", config.approval_policy)),
|
||||
("sandbox", format!("{:?}", config.sandbox_policy)),
|
||||
];
|
||||
if config.model_provider.wire_api == WireApi::Responses
|
||||
&& model_supports_reasoning_summaries(&config.model)
|
||||
{
|
||||
entries.push((
|
||||
"reasoning effort",
|
||||
config.model_reasoning_effort.to_string(),
|
||||
));
|
||||
entries.push((
|
||||
"reasoning summaries",
|
||||
config.model_reasoning_summary.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
for (key, value) in entries {
|
||||
println!("{} {}", format!("{key}:").style(self.bold), value);
|
||||
}
|
||||
|
||||
println!("--------");
|
||||
|
||||
// Echo the prompt that will be sent to the agent so it is visible in the
|
||||
// transcript/logs before any events come in. Note the prompt may have been
|
||||
// read from stdin, so it may not be visible in the terminal otherwise.
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"User instructions:".style(self.bold).style(self.cyan),
|
||||
prompt
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn process_event(&mut self, event: Event) {
|
||||
let Event { id: _, msg } = event;
|
||||
match msg {
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
let prefix = "ERROR:".style(self.red);
|
||||
ts_println!(self, "{prefix} {message}");
|
||||
}
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_println!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::TaskStarted | EventMsg::TaskComplete(_) => {
|
||||
// Ignore.
|
||||
}
|
||||
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{message}",
|
||||
"codex".style(self.bold).style(self.magenta)
|
||||
);
|
||||
}
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
}) => {
|
||||
self.call_id_to_command.insert(
|
||||
call_id.clone(),
|
||||
ExecCommandBegin {
|
||||
command: command.clone(),
|
||||
start_time: Instant::now(),
|
||||
},
|
||||
);
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {} in {}",
|
||||
"exec".style(self.magenta),
|
||||
escape_command(&command).style(self.bold),
|
||||
cwd.to_string_lossy(),
|
||||
);
|
||||
}
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
stdout,
|
||||
stderr,
|
||||
exit_code,
|
||||
}) => {
|
||||
let exec_command = self.call_id_to_command.remove(&call_id);
|
||||
let (duration, call) = if let Some(ExecCommandBegin {
|
||||
command,
|
||||
start_time,
|
||||
}) = exec_command
|
||||
{
|
||||
(
|
||||
format!(" in {}", format_elapsed(start_time)),
|
||||
format!("{}", escape_command(&command).style(self.bold)),
|
||||
)
|
||||
} else {
|
||||
("".to_string(), format!("exec('{call_id}')"))
|
||||
};
|
||||
|
||||
let output = if exit_code == 0 { stdout } else { stderr };
|
||||
let truncated_output = output
|
||||
.lines()
|
||||
.take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
match exit_code {
|
||||
0 => {
|
||||
let title = format!("{call} succeeded{duration}:");
|
||||
ts_println!(self, "{}", title.style(self.green));
|
||||
}
|
||||
_ => {
|
||||
let title = format!("{call} exited {exit_code}{duration}:");
|
||||
ts_println!(self, "{}", title.style(self.red));
|
||||
}
|
||||
}
|
||||
println!("{}", truncated_output.style(self.dimmed));
|
||||
}
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id,
|
||||
server,
|
||||
tool,
|
||||
arguments,
|
||||
}) => {
|
||||
// Build fully-qualified tool name: server.tool
|
||||
let fq_tool_name = format!("{server}.{tool}");
|
||||
|
||||
// Format arguments as compact JSON so they fit on one line.
|
||||
let args_str = arguments
|
||||
.as_ref()
|
||||
.map(|v: &serde_json::Value| {
|
||||
serde_json::to_string(v).unwrap_or_else(|_| v.to_string())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let invocation = if args_str.is_empty() {
|
||||
format!("{fq_tool_name}()")
|
||||
} else {
|
||||
format!("{fq_tool_name}({args_str})")
|
||||
};
|
||||
|
||||
self.call_id_to_tool_call.insert(
|
||||
call_id.clone(),
|
||||
McpToolCallBegin {
|
||||
invocation: invocation.clone(),
|
||||
start_time: Instant::now(),
|
||||
},
|
||||
);
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {}",
|
||||
"tool".style(self.magenta),
|
||||
invocation.style(self.bold),
|
||||
);
|
||||
}
|
||||
EventMsg::McpToolCallEnd(tool_call_end_event) => {
|
||||
let is_success = tool_call_end_event.is_success();
|
||||
let McpToolCallEndEvent { call_id, result } = tool_call_end_event;
|
||||
// Retrieve start time and invocation for duration calculation and labeling.
|
||||
let info = self.call_id_to_tool_call.remove(&call_id);
|
||||
|
||||
let (duration, invocation) = if let Some(McpToolCallBegin {
|
||||
invocation,
|
||||
start_time,
|
||||
..
|
||||
}) = info
|
||||
{
|
||||
(format!(" in {}", format_elapsed(start_time)), invocation)
|
||||
} else {
|
||||
(String::new(), format!("tool('{call_id}')"))
|
||||
};
|
||||
|
||||
let status_str = if is_success { "success" } else { "failed" };
|
||||
let title_style = if is_success { self.green } else { self.red };
|
||||
let title = format!("{invocation} {status_str}{duration}:");
|
||||
|
||||
ts_println!(self, "{}", title.style(title_style));
|
||||
|
||||
if let Ok(res) = result {
|
||||
let val: serde_json::Value = res.into();
|
||||
let pretty =
|
||||
serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
|
||||
|
||||
for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
|
||||
println!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id,
|
||||
auto_approved,
|
||||
changes,
|
||||
}) => {
|
||||
// Store metadata so we can calculate duration later when we
|
||||
// receive the corresponding PatchApplyEnd event.
|
||||
self.call_id_to_patch.insert(
|
||||
call_id.clone(),
|
||||
PatchApplyBegin {
|
||||
start_time: Instant::now(),
|
||||
auto_approved,
|
||||
},
|
||||
);
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} auto_approved={}:",
|
||||
"apply_patch".style(self.magenta),
|
||||
auto_approved,
|
||||
);
|
||||
|
||||
// Pretty-print the patch summary with colored diff markers so
|
||||
// it’s easy to scan in the terminal output.
|
||||
for (path, change) in changes.iter() {
|
||||
match change {
|
||||
FileChange::Add { content } => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
println!("{}", header.style(self.magenta));
|
||||
for line in content.lines() {
|
||||
println!("{}", line.style(self.green));
|
||||
}
|
||||
}
|
||||
FileChange::Delete => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
println!("{}", header.style(self.magenta));
|
||||
}
|
||||
FileChange::Update {
|
||||
unified_diff,
|
||||
move_path,
|
||||
} => {
|
||||
let header = if let Some(dest) = move_path {
|
||||
format!(
|
||||
"{} {} -> {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy(),
|
||||
dest.to_string_lossy()
|
||||
)
|
||||
} else {
|
||||
format!("{} {}", format_file_change(change), path.to_string_lossy())
|
||||
};
|
||||
println!("{}", header.style(self.magenta));
|
||||
|
||||
// Colorize diff lines. We keep file header lines
|
||||
// (--- / +++) without extra coloring so they are
|
||||
// still readable.
|
||||
for diff_line in unified_diff.lines() {
|
||||
if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
|
||||
println!("{}", diff_line.style(self.green));
|
||||
} else if diff_line.starts_with('-')
|
||||
&& !diff_line.starts_with("---")
|
||||
{
|
||||
println!("{}", diff_line.style(self.red));
|
||||
} else {
|
||||
println!("{diff_line}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id,
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
}) => {
|
||||
let patch_begin = self.call_id_to_patch.remove(&call_id);
|
||||
|
||||
// Compute duration and summary label similar to exec commands.
|
||||
let (duration, label) = if let Some(PatchApplyBegin {
|
||||
start_time,
|
||||
auto_approved,
|
||||
}) = patch_begin
|
||||
{
|
||||
(
|
||||
format!(" in {}", format_elapsed(start_time)),
|
||||
format!("apply_patch(auto_approved={})", auto_approved),
|
||||
)
|
||||
} else {
|
||||
(String::new(), format!("apply_patch('{call_id}')"))
|
||||
};
|
||||
|
||||
let (exit_code, output, title_style) = if success {
|
||||
(0, stdout, self.green)
|
||||
} else {
|
||||
(1, stderr, self.red)
|
||||
};
|
||||
|
||||
let title = format!("{label} exited {exit_code}{duration}:");
|
||||
ts_println!(self, "{}", title.style(title_style));
|
||||
for line in output.lines() {
|
||||
println!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(_) => {
|
||||
// Should we exit?
|
||||
}
|
||||
EventMsg::ApplyPatchApprovalRequest(_) => {
|
||||
// Should we exit?
|
||||
}
|
||||
EventMsg::AgentReasoning(agent_reasoning_event) => {
|
||||
if self.show_agent_reasoning {
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"thinking".style(self.italic).style(self.magenta),
|
||||
agent_reasoning_event.text
|
||||
);
|
||||
}
|
||||
}
|
||||
EventMsg::SessionConfigured(session_configured_event) => {
|
||||
let SessionConfiguredEvent {
|
||||
session_id,
|
||||
model,
|
||||
history_log_id: _,
|
||||
history_entry_count: _,
|
||||
} = session_configured_event;
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {}",
|
||||
"codex session".style(self.magenta).style(self.bold),
|
||||
session_id.to_string().style(self.dimmed)
|
||||
);
|
||||
|
||||
ts_println!(self, "model: {}", model);
|
||||
println!();
|
||||
}
|
||||
EventMsg::GetHistoryEntryResponse(_) => {
|
||||
// Currently ignored in exec output.
|
||||
}
|
||||
fn write_last_message_file(contents: &str, last_message_path: Option<&Path>) {
|
||||
if let Some(path) = last_message_path {
|
||||
if let Err(e) = std::fs::write(path, contents) {
|
||||
eprintln!("Failed to write last message file {path:?}: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_command(command: &[String]) -> String {
|
||||
try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "))
|
||||
}
|
||||
|
||||
fn format_file_change(change: &FileChange) -> &'static str {
|
||||
match change {
|
||||
FileChange::Add { .. } => "A",
|
||||
FileChange::Delete => "D",
|
||||
FileChange::Update {
|
||||
move_path: Some(_), ..
|
||||
} => "R",
|
||||
FileChange::Update {
|
||||
move_path: None, ..
|
||||
} => "M",
|
||||
}
|
||||
}
|
||||
|
||||
540
codex-rs/exec/src/event_processor_with_human_output.rs
Normal file
540
codex-rs/exec/src/event_processor_with_human_output.rs
Normal file
@@ -0,0 +1,540 @@
|
||||
use codex_common::elapsed::format_elapsed;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::AgentMessageDeltaEvent;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::AgentReasoningDeltaEvent;
|
||||
use codex_core::protocol::BackgroundEventEvent;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use codex_core::protocol::TokenUsage;
|
||||
use owo_colors::OwoColorize;
|
||||
use owo_colors::Style;
|
||||
use shlex::try_join;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use crate::event_processor::create_config_summary_entries;
|
||||
use crate::event_processor::handle_last_message;
|
||||
|
||||
/// This should be configurable. When used in CI, users may not want to impose
|
||||
/// a limit so they can see the full transcript.
|
||||
const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
|
||||
pub(crate) struct EventProcessorWithHumanOutput {
|
||||
call_id_to_command: HashMap<String, ExecCommandBegin>,
|
||||
call_id_to_patch: HashMap<String, PatchApplyBegin>,
|
||||
|
||||
/// Tracks in-flight MCP tool calls so we can calculate duration and print
|
||||
/// a concise summary when the corresponding `McpToolCallEnd` event is
|
||||
/// received.
|
||||
call_id_to_tool_call: HashMap<String, McpToolCallBegin>,
|
||||
|
||||
// To ensure that --color=never is respected, ANSI escapes _must_ be added
|
||||
// using .style() with one of these fields. If you need a new style, add a
|
||||
// new field here.
|
||||
bold: Style,
|
||||
italic: Style,
|
||||
dimmed: Style,
|
||||
|
||||
magenta: Style,
|
||||
red: Style,
|
||||
green: Style,
|
||||
cyan: Style,
|
||||
|
||||
/// Whether to include `AgentReasoning` events in the output.
|
||||
show_agent_reasoning: bool,
|
||||
answer_started: bool,
|
||||
reasoning_started: bool,
|
||||
last_message_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl EventProcessorWithHumanOutput {
|
||||
pub(crate) fn create_with_ansi(
|
||||
with_ansi: bool,
|
||||
config: &Config,
|
||||
last_message_path: Option<PathBuf>,
|
||||
) -> Self {
|
||||
let call_id_to_command = HashMap::new();
|
||||
let call_id_to_patch = HashMap::new();
|
||||
let call_id_to_tool_call = HashMap::new();
|
||||
|
||||
if with_ansi {
|
||||
Self {
|
||||
call_id_to_command,
|
||||
call_id_to_patch,
|
||||
bold: Style::new().bold(),
|
||||
italic: Style::new().italic(),
|
||||
dimmed: Style::new().dimmed(),
|
||||
magenta: Style::new().magenta(),
|
||||
red: Style::new().red(),
|
||||
green: Style::new().green(),
|
||||
cyan: Style::new().cyan(),
|
||||
call_id_to_tool_call,
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
answer_started: false,
|
||||
reasoning_started: false,
|
||||
last_message_path,
|
||||
}
|
||||
} else {
|
||||
Self {
|
||||
call_id_to_command,
|
||||
call_id_to_patch,
|
||||
bold: Style::new(),
|
||||
italic: Style::new(),
|
||||
dimmed: Style::new(),
|
||||
magenta: Style::new(),
|
||||
red: Style::new(),
|
||||
green: Style::new(),
|
||||
cyan: Style::new(),
|
||||
call_id_to_tool_call,
|
||||
show_agent_reasoning: !config.hide_agent_reasoning,
|
||||
answer_started: false,
|
||||
reasoning_started: false,
|
||||
last_message_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ExecCommandBegin {
|
||||
command: Vec<String>,
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
/// Metadata captured when an `McpToolCallBegin` event is received.
|
||||
struct McpToolCallBegin {
|
||||
/// Formatted invocation string, e.g. `server.tool({"city":"sf"})`.
|
||||
invocation: String,
|
||||
/// Timestamp when the call started so we can compute duration later.
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
struct PatchApplyBegin {
|
||||
start_time: Instant,
|
||||
auto_approved: bool,
|
||||
}
|
||||
|
||||
// Timestamped println helper. The timestamp is styled with self.dimmed.
|
||||
#[macro_export]
|
||||
macro_rules! ts_println {
|
||||
($self:ident, $($arg:tt)*) => {{
|
||||
let now = chrono::Utc::now();
|
||||
let formatted = now.format("[%Y-%m-%dT%H:%M:%S]");
|
||||
print!("{} ", formatted.style($self.dimmed));
|
||||
println!($($arg)*);
|
||||
}};
|
||||
}
|
||||
|
||||
impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
/// Print a concise summary of the effective configuration that will be used
|
||||
/// for the session. This mirrors the information shown in the TUI welcome
|
||||
/// screen.
|
||||
fn print_config_summary(&mut self, config: &Config, prompt: &str) {
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
ts_println!(
|
||||
self,
|
||||
"OpenAI Codex v{} (research preview)\n--------",
|
||||
VERSION
|
||||
);
|
||||
|
||||
let entries = create_config_summary_entries(config);
|
||||
|
||||
for (key, value) in entries {
|
||||
println!("{} {}", format!("{key}:").style(self.bold), value);
|
||||
}
|
||||
|
||||
println!("--------");
|
||||
|
||||
// Echo the prompt that will be sent to the agent so it is visible in the
|
||||
// transcript/logs before any events come in. Note the prompt may have been
|
||||
// read from stdin, so it may not be visible in the terminal otherwise.
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"User instructions:".style(self.bold).style(self.cyan),
|
||||
prompt
|
||||
);
|
||||
}
|
||||
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus {
|
||||
let Event { id: _, msg } = event;
|
||||
match msg {
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
let prefix = "ERROR:".style(self.red);
|
||||
ts_println!(self, "{prefix} {message}");
|
||||
}
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_println!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::TaskStarted => {
|
||||
// Ignore.
|
||||
}
|
||||
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
|
||||
handle_last_message(
|
||||
last_agent_message.as_deref(),
|
||||
self.last_message_path.as_deref(),
|
||||
);
|
||||
return CodexStatus::InitiateShutdown;
|
||||
}
|
||||
EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
|
||||
ts_println!(self, "tokens used: {total_tokens}");
|
||||
}
|
||||
EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
|
||||
if !self.answer_started {
|
||||
ts_println!(self, "{}\n", "codex".style(self.italic).style(self.magenta));
|
||||
self.answer_started = true;
|
||||
}
|
||||
print!("{delta}");
|
||||
#[allow(clippy::expect_used)]
|
||||
std::io::stdout().flush().expect("could not flush stdout");
|
||||
}
|
||||
EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
|
||||
if !self.show_agent_reasoning {
|
||||
return CodexStatus::Running;
|
||||
}
|
||||
if !self.reasoning_started {
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n",
|
||||
"thinking".style(self.italic).style(self.magenta),
|
||||
);
|
||||
self.reasoning_started = true;
|
||||
}
|
||||
print!("{delta}");
|
||||
#[allow(clippy::expect_used)]
|
||||
std::io::stdout().flush().expect("could not flush stdout");
|
||||
}
|
||||
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
|
||||
// if answer_started is false, this means we haven't received any
|
||||
// delta. Thus, we need to print the message as a new answer.
|
||||
if !self.answer_started {
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"codex".style(self.italic).style(self.magenta),
|
||||
message,
|
||||
);
|
||||
} else {
|
||||
println!();
|
||||
self.answer_started = false;
|
||||
}
|
||||
}
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
}) => {
|
||||
self.call_id_to_command.insert(
|
||||
call_id.clone(),
|
||||
ExecCommandBegin {
|
||||
command: command.clone(),
|
||||
start_time: Instant::now(),
|
||||
},
|
||||
);
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {} in {}",
|
||||
"exec".style(self.magenta),
|
||||
escape_command(&command).style(self.bold),
|
||||
cwd.to_string_lossy(),
|
||||
);
|
||||
}
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
stdout,
|
||||
stderr,
|
||||
exit_code,
|
||||
}) => {
|
||||
let exec_command = self.call_id_to_command.remove(&call_id);
|
||||
let (duration, call) = if let Some(ExecCommandBegin {
|
||||
command,
|
||||
start_time,
|
||||
}) = exec_command
|
||||
{
|
||||
(
|
||||
format!(" in {}", format_elapsed(start_time)),
|
||||
format!("{}", escape_command(&command).style(self.bold)),
|
||||
)
|
||||
} else {
|
||||
("".to_string(), format!("exec('{call_id}')"))
|
||||
};
|
||||
|
||||
let output = if exit_code == 0 { stdout } else { stderr };
|
||||
let truncated_output = output
|
||||
.lines()
|
||||
.take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
match exit_code {
|
||||
0 => {
|
||||
let title = format!("{call} succeeded{duration}:");
|
||||
ts_println!(self, "{}", title.style(self.green));
|
||||
}
|
||||
_ => {
|
||||
let title = format!("{call} exited {exit_code}{duration}:");
|
||||
ts_println!(self, "{}", title.style(self.red));
|
||||
}
|
||||
}
|
||||
println!("{}", truncated_output.style(self.dimmed));
|
||||
}
|
||||
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id,
|
||||
server,
|
||||
tool,
|
||||
arguments,
|
||||
}) => {
|
||||
// Build fully-qualified tool name: server.tool
|
||||
let fq_tool_name = format!("{server}.{tool}");
|
||||
|
||||
// Format arguments as compact JSON so they fit on one line.
|
||||
let args_str = arguments
|
||||
.as_ref()
|
||||
.map(|v: &serde_json::Value| {
|
||||
serde_json::to_string(v).unwrap_or_else(|_| v.to_string())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let invocation = if args_str.is_empty() {
|
||||
format!("{fq_tool_name}()")
|
||||
} else {
|
||||
format!("{fq_tool_name}({args_str})")
|
||||
};
|
||||
|
||||
self.call_id_to_tool_call.insert(
|
||||
call_id.clone(),
|
||||
McpToolCallBegin {
|
||||
invocation: invocation.clone(),
|
||||
start_time: Instant::now(),
|
||||
},
|
||||
);
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {}",
|
||||
"tool".style(self.magenta),
|
||||
invocation.style(self.bold),
|
||||
);
|
||||
}
|
||||
EventMsg::McpToolCallEnd(tool_call_end_event) => {
|
||||
let is_success = tool_call_end_event.is_success();
|
||||
let McpToolCallEndEvent { call_id, result } = tool_call_end_event;
|
||||
// Retrieve start time and invocation for duration calculation and labeling.
|
||||
let info = self.call_id_to_tool_call.remove(&call_id);
|
||||
|
||||
let (duration, invocation) = if let Some(McpToolCallBegin {
|
||||
invocation,
|
||||
start_time,
|
||||
..
|
||||
}) = info
|
||||
{
|
||||
(format!(" in {}", format_elapsed(start_time)), invocation)
|
||||
} else {
|
||||
(String::new(), format!("tool('{call_id}')"))
|
||||
};
|
||||
|
||||
let status_str = if is_success { "success" } else { "failed" };
|
||||
let title_style = if is_success { self.green } else { self.red };
|
||||
let title = format!("{invocation} {status_str}{duration}:");
|
||||
|
||||
ts_println!(self, "{}", title.style(title_style));
|
||||
|
||||
if let Ok(res) = result {
|
||||
let val: serde_json::Value = res.into();
|
||||
let pretty =
|
||||
serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
|
||||
|
||||
for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
|
||||
println!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id,
|
||||
auto_approved,
|
||||
changes,
|
||||
}) => {
|
||||
// Store metadata so we can calculate duration later when we
|
||||
// receive the corresponding PatchApplyEnd event.
|
||||
self.call_id_to_patch.insert(
|
||||
call_id.clone(),
|
||||
PatchApplyBegin {
|
||||
start_time: Instant::now(),
|
||||
auto_approved,
|
||||
},
|
||||
);
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} auto_approved={}:",
|
||||
"apply_patch".style(self.magenta),
|
||||
auto_approved,
|
||||
);
|
||||
|
||||
// Pretty-print the patch summary with colored diff markers so
|
||||
// it's easy to scan in the terminal output.
|
||||
for (path, change) in changes.iter() {
|
||||
match change {
|
||||
FileChange::Add { content } => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
println!("{}", header.style(self.magenta));
|
||||
for line in content.lines() {
|
||||
println!("{}", line.style(self.green));
|
||||
}
|
||||
}
|
||||
FileChange::Delete => {
|
||||
let header = format!(
|
||||
"{} {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy()
|
||||
);
|
||||
println!("{}", header.style(self.magenta));
|
||||
}
|
||||
FileChange::Update {
|
||||
unified_diff,
|
||||
move_path,
|
||||
} => {
|
||||
let header = if let Some(dest) = move_path {
|
||||
format!(
|
||||
"{} {} -> {}",
|
||||
format_file_change(change),
|
||||
path.to_string_lossy(),
|
||||
dest.to_string_lossy()
|
||||
)
|
||||
} else {
|
||||
format!("{} {}", format_file_change(change), path.to_string_lossy())
|
||||
};
|
||||
println!("{}", header.style(self.magenta));
|
||||
|
||||
// Colorize diff lines. We keep file header lines
|
||||
// (--- / +++) without extra coloring so they are
|
||||
// still readable.
|
||||
for diff_line in unified_diff.lines() {
|
||||
if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
|
||||
println!("{}", diff_line.style(self.green));
|
||||
} else if diff_line.starts_with('-')
|
||||
&& !diff_line.starts_with("---")
|
||||
{
|
||||
println!("{}", diff_line.style(self.red));
|
||||
} else {
|
||||
println!("{diff_line}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id,
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
}) => {
|
||||
let patch_begin = self.call_id_to_patch.remove(&call_id);
|
||||
|
||||
// Compute duration and summary label similar to exec commands.
|
||||
let (duration, label) = if let Some(PatchApplyBegin {
|
||||
start_time,
|
||||
auto_approved,
|
||||
}) = patch_begin
|
||||
{
|
||||
(
|
||||
format!(" in {}", format_elapsed(start_time)),
|
||||
format!("apply_patch(auto_approved={auto_approved})"),
|
||||
)
|
||||
} else {
|
||||
(String::new(), format!("apply_patch('{call_id}')"))
|
||||
};
|
||||
|
||||
let (exit_code, output, title_style) = if success {
|
||||
(0, stdout, self.green)
|
||||
} else {
|
||||
(1, stderr, self.red)
|
||||
};
|
||||
|
||||
let title = format!("{label} exited {exit_code}{duration}:");
|
||||
ts_println!(self, "{}", title.style(title_style));
|
||||
for line in output.lines() {
|
||||
println!("{}", line.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(_) => {
|
||||
// Should we exit?
|
||||
}
|
||||
EventMsg::ApplyPatchApprovalRequest(_) => {
|
||||
// Should we exit?
|
||||
}
|
||||
EventMsg::AgentReasoning(agent_reasoning_event) => {
|
||||
if self.show_agent_reasoning {
|
||||
if !self.reasoning_started {
|
||||
ts_println!(
|
||||
self,
|
||||
"{}\n{}",
|
||||
"codex".style(self.italic).style(self.magenta),
|
||||
agent_reasoning_event.text,
|
||||
);
|
||||
} else {
|
||||
println!();
|
||||
self.reasoning_started = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::SessionConfigured(session_configured_event) => {
|
||||
let SessionConfiguredEvent {
|
||||
session_id,
|
||||
model,
|
||||
history_log_id: _,
|
||||
history_entry_count: _,
|
||||
} = session_configured_event;
|
||||
|
||||
ts_println!(
|
||||
self,
|
||||
"{} {}",
|
||||
"codex session".style(self.magenta).style(self.bold),
|
||||
session_id.to_string().style(self.dimmed)
|
||||
);
|
||||
|
||||
ts_println!(self, "model: {}", model);
|
||||
println!();
|
||||
}
|
||||
EventMsg::GetHistoryEntryResponse(_) => {
|
||||
// Currently ignored in exec output.
|
||||
}
|
||||
EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
|
||||
}
|
||||
CodexStatus::Running
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_command(command: &[String]) -> String {
|
||||
try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "))
|
||||
}
|
||||
|
||||
fn format_file_change(change: &FileChange) -> &'static str {
|
||||
match change {
|
||||
FileChange::Add { .. } => "A",
|
||||
FileChange::Delete => "D",
|
||||
FileChange::Update {
|
||||
move_path: Some(_), ..
|
||||
} => "R",
|
||||
FileChange::Update {
|
||||
move_path: None, ..
|
||||
} => "M",
|
||||
}
|
||||
}
|
||||
64
codex-rs/exec/src/event_processor_with_json_output.rs
Normal file
64
codex-rs/exec/src/event_processor_with_json_output.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use crate::event_processor::create_config_summary_entries;
|
||||
use crate::event_processor::handle_last_message;
|
||||
|
||||
pub(crate) struct EventProcessorWithJsonOutput {
|
||||
last_message_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl EventProcessorWithJsonOutput {
|
||||
pub fn new(last_message_path: Option<PathBuf>) -> Self {
|
||||
Self { last_message_path }
|
||||
}
|
||||
}
|
||||
|
||||
impl EventProcessor for EventProcessorWithJsonOutput {
|
||||
fn print_config_summary(&mut self, config: &Config, prompt: &str) {
|
||||
let entries = create_config_summary_entries(config)
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.to_string(), value))
|
||||
.collect::<HashMap<String, String>>();
|
||||
#[allow(clippy::expect_used)]
|
||||
let config_json =
|
||||
serde_json::to_string(&entries).expect("Failed to serialize config summary to JSON");
|
||||
println!("{config_json}");
|
||||
|
||||
let prompt_json = json!({
|
||||
"prompt": prompt,
|
||||
});
|
||||
println!("{prompt_json}");
|
||||
}
|
||||
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus {
|
||||
match event.msg {
|
||||
EventMsg::AgentMessageDelta(_) | EventMsg::AgentReasoningDelta(_) => {
|
||||
// Suppress streaming events in JSON mode.
|
||||
CodexStatus::Running
|
||||
}
|
||||
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
|
||||
handle_last_message(
|
||||
last_agent_message.as_deref(),
|
||||
self.last_message_path.as_deref(),
|
||||
);
|
||||
CodexStatus::InitiateShutdown
|
||||
}
|
||||
EventMsg::ShutdownComplete => CodexStatus::Shutdown,
|
||||
_ => {
|
||||
if let Ok(line) = serde_json::to_string(&event) {
|
||||
println!("{line}");
|
||||
}
|
||||
CodexStatus::Running
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
mod cli;
|
||||
mod event_processor;
|
||||
mod event_processor_with_human_output;
|
||||
mod event_processor_with_json_output;
|
||||
|
||||
use std::io::IsTerminal;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -11,31 +12,37 @@ pub use cli::Cli;
|
||||
use codex_core::codex_wrapper;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config_types::SandboxMode;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::protocol::TaskCompleteEvent;
|
||||
use codex_core::util::is_inside_git_repo;
|
||||
use event_processor::EventProcessor;
|
||||
use event_processor_with_human_output::EventProcessorWithHumanOutput;
|
||||
use event_processor_with_json_output::EventProcessorWithJsonOutput;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
|
||||
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
let Cli {
|
||||
images,
|
||||
model,
|
||||
config_profile,
|
||||
full_auto,
|
||||
sandbox,
|
||||
dangerously_bypass_approvals_and_sandbox,
|
||||
cwd,
|
||||
skip_git_repo_check,
|
||||
color,
|
||||
last_message_file,
|
||||
json: json_mode,
|
||||
sandbox_mode: sandbox_mode_cli_arg,
|
||||
prompt,
|
||||
config_overrides,
|
||||
} = cli;
|
||||
@@ -84,10 +91,12 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
),
|
||||
};
|
||||
|
||||
let sandbox_policy = if full_auto {
|
||||
Some(SandboxPolicy::new_full_auto_policy())
|
||||
let sandbox_mode = if full_auto {
|
||||
Some(SandboxMode::WorkspaceWrite)
|
||||
} else if dangerously_bypass_approvals_and_sandbox {
|
||||
Some(SandboxMode::DangerFullAccess)
|
||||
} else {
|
||||
sandbox.permissions.clone().map(Into::into)
|
||||
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
|
||||
};
|
||||
|
||||
// Load configuration and determine approval policy
|
||||
@@ -97,10 +106,11 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
// This CLI is intended to be headless and has no affordances for asking
|
||||
// the user for approval.
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_policy,
|
||||
sandbox_mode,
|
||||
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
|
||||
model_provider: None,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
@@ -112,8 +122,16 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
};
|
||||
|
||||
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides)?;
|
||||
let mut event_processor =
|
||||
EventProcessor::create_with_ansi(stdout_with_ansi, !config.hide_agent_reasoning);
|
||||
let mut event_processor: Box<dyn EventProcessor> = if json_mode {
|
||||
Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone()))
|
||||
} else {
|
||||
Box::new(EventProcessorWithHumanOutput::create_with_ansi(
|
||||
stdout_with_ansi,
|
||||
&config,
|
||||
last_message_file.clone(),
|
||||
))
|
||||
};
|
||||
|
||||
// Print the effective configuration and prompt so users can see what Codex
|
||||
// is using.
|
||||
event_processor.print_config_summary(&config, &prompt);
|
||||
@@ -137,7 +155,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
.with_writer(std::io::stderr)
|
||||
.try_init();
|
||||
|
||||
let (codex_wrapper, event, ctrl_c) = codex_wrapper::init_codex(config).await?;
|
||||
let (codex_wrapper, event, ctrl_c, _session_id) = codex_wrapper::init_codex(config).await?;
|
||||
let codex = Arc::new(codex_wrapper);
|
||||
info!("Codex initialized with event: {event:?}");
|
||||
|
||||
@@ -207,40 +225,17 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
|
||||
// Run the loop until the task is complete.
|
||||
while let Some(event) = rx.recv().await {
|
||||
let (is_last_event, last_assistant_message) = match &event.msg {
|
||||
EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
|
||||
(true, last_agent_message.clone())
|
||||
let shutdown: CodexStatus = event_processor.process_event(event);
|
||||
match shutdown {
|
||||
CodexStatus::Running => continue,
|
||||
CodexStatus::InitiateShutdown => {
|
||||
codex.submit(Op::Shutdown).await?;
|
||||
}
|
||||
CodexStatus::Shutdown => {
|
||||
break;
|
||||
}
|
||||
_ => (false, None),
|
||||
};
|
||||
event_processor.process_event(event);
|
||||
if is_last_event {
|
||||
handle_last_message(last_assistant_message, last_message_file.as_deref())?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_last_message(
|
||||
last_agent_message: Option<String>,
|
||||
last_message_file: Option<&Path>,
|
||||
) -> std::io::Result<()> {
|
||||
match (last_agent_message, last_message_file) {
|
||||
(Some(last_agent_message), Some(last_message_file)) => {
|
||||
// Last message and a file to write to.
|
||||
std::fs::write(last_message_file, last_agent_message)?;
|
||||
}
|
||||
(None, Some(last_message_file)) => {
|
||||
eprintln!(
|
||||
"Warning: No last message to write to file: {}",
|
||||
last_message_file.to_string_lossy()
|
||||
);
|
||||
}
|
||||
(_, None) => {
|
||||
// No last message and no file to write to.
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ anyhow = "1"
|
||||
starlark = "0.13.0"
|
||||
allocative = "0.3.3"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
derive_more = { version = "1", features = ["display"] }
|
||||
derive_more = { version = "2", features = ["display"] }
|
||||
env_logger = "0.11.5"
|
||||
log = "0.4"
|
||||
multimap = "0.10.0"
|
||||
@@ -28,4 +28,6 @@ regex-lite = "0.1"
|
||||
serde = { version = "1.0.194", features = ["derive"] }
|
||||
serde_json = "1.0.110"
|
||||
serde_with = { version = "3", features = ["macros"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.13.0"
|
||||
|
||||
@@ -21,7 +21,7 @@ impl Display for ExecCall {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.program)?;
|
||||
for arg in &self.args {
|
||||
write!(f, " {}", arg)?;
|
||||
write!(f, " {arg}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -89,7 +89,7 @@ fn main() -> Result<()> {
|
||||
|
||||
let (output, exit_code) = check_command(&policy, exec, args.require_safe);
|
||||
let json = serde_json::to_string(&output)?;
|
||||
println!("{}", json);
|
||||
println!("{json}");
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ impl Policy {
|
||||
arg: arg.clone(),
|
||||
exec_call: exec_call.clone(),
|
||||
},
|
||||
reason: format!("arg `{}` contains forbidden substring", arg),
|
||||
reason: format!("arg `{arg}` contains forbidden substring"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ impl PolicyBuilder {
|
||||
}
|
||||
|
||||
fn add_program_spec(&self, program_spec: ProgramSpec) {
|
||||
info!("adding program spec: {:?}", program_spec);
|
||||
info!("adding program spec: {program_spec:?}");
|
||||
let name = program_spec.program.clone();
|
||||
let mut programs = self.programs.borrow_mut();
|
||||
programs.insert(name.clone(), program_spec);
|
||||
|
||||
21
codex-rs/file-search/Cargo.toml
Normal file
21
codex-rs/file-search/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "codex-file-search"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-file-search"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lib]
|
||||
name = "codex_file_search"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
ignore = "0.4.23"
|
||||
nucleo-matcher = "0.3.1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1.0.110"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
5
codex-rs/file-search/README.md
Normal file
5
codex-rs/file-search/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# codex_file_search
|
||||
|
||||
Fast fuzzy file search tool for Codex.
|
||||
|
||||
Uses <https://crates.io/crates/ignore> under the hood (which is what `ripgrep` uses) to traverse a directory (while honoring `.gitignore`, etc.) to produce the list of files to search and then uses <https://crates.io/crates/nucleo-matcher> to fuzzy-match the user supplied `PATTERN` against the corpus.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user