mirror of
https://github.com/openai/codex.git
synced 2026-05-01 18:06:47 +00:00
Compare commits
93 Commits
codex-rs-b
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d131992dbc | ||
|
|
4cb3c76798 | ||
|
|
6dad5c3b17 | ||
|
|
cd2d84d496 | ||
|
|
688100f7f4 | ||
|
|
f30bf4bbcf | ||
|
|
1b7c8d2569 | ||
|
|
4a341efe92 | ||
|
|
e2efe8da9c | ||
|
|
5a0f236ca4 | ||
|
|
ff8ae1ffa1 | ||
|
|
b3ad764532 | ||
|
|
a331a67b3e | ||
|
|
2e293ce903 | ||
|
|
64feeb3803 | ||
|
|
fa0e17f83a | ||
|
|
a339a7bcce | ||
|
|
fcfe43c7df | ||
|
|
296996d74e | ||
|
|
50924101d2 | ||
|
|
72082164c1 | ||
|
|
e09691337d | ||
|
|
86d5a9d80d | ||
|
|
531ce7626f | ||
|
|
63363a54e5 | ||
|
|
6d65010aad | ||
|
|
0776d78357 | ||
|
|
ed5e848f3e | ||
|
|
5aafe190e2 | ||
|
|
b73426c1c4 | ||
|
|
345a38502d | ||
|
|
029f39b9da | ||
|
|
a80240cfdc | ||
|
|
2d5246050a | ||
|
|
77b017f67d | ||
|
|
c02d25fbad | ||
|
|
9db53b33aa | ||
|
|
515b6331bd | ||
|
|
a67a67f325 | ||
|
|
c6fcec55fe | ||
|
|
6fcc528a43 | ||
|
|
5a5aa89914 | ||
|
|
0f3cc8f842 | ||
|
|
d7245cbbc9 | ||
|
|
e40f86b446 | ||
|
|
7896b1089d | ||
|
|
1410ae95ca | ||
|
|
fccf5f3221 | ||
|
|
1159eaf04f | ||
|
|
e81327e5f4 | ||
|
|
4f3d294762 | ||
|
|
cf1d070538 | ||
|
|
ae743d56b0 | ||
|
|
1bf82056b3 | ||
|
|
e207f20f64 | ||
|
|
0f40ef5a10 | ||
|
|
8676185389 | ||
|
|
baa92f37e0 | ||
|
|
a0239c3cd6 | ||
|
|
bdfa95ed31 | ||
|
|
828e2062c2 | ||
|
|
92957c47fb | ||
|
|
8c1902b562 | ||
|
|
a32d305ae6 | ||
|
|
a768a6a41d | ||
|
|
25a9949c49 | ||
|
|
392fdd7db6 | ||
|
|
ae1a83f095 | ||
|
|
d60f350cf8 | ||
|
|
eba0e32909 | ||
|
|
29d154cb13 | ||
|
|
6b5b184f21 | ||
|
|
4bf81373a7 | ||
|
|
89ef4efdcf | ||
|
|
d1de7bb383 | ||
|
|
63deb7c369 | ||
|
|
cb379d7797 | ||
|
|
ef7208359f | ||
|
|
5746561428 | ||
|
|
d766e845b3 | ||
|
|
a4bfdf6779 | ||
|
|
44022db8d0 | ||
|
|
a86270f581 | ||
|
|
835eb77a7d | ||
|
|
dbc0ad348e | ||
|
|
9b4c2984d4 | ||
|
|
f3bde21759 | ||
|
|
1c6a3f1097 | ||
|
|
f8b6b1db81 | ||
|
|
031df77dfb | ||
|
|
f9143d0361 | ||
|
|
2880925a44 | ||
|
|
3e19e8fd59 |
27
.devcontainer/Dockerfile
Normal file
27
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
||||
FROM ubuntu:24.04
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# enable 'universe' because musl-tools & clang live there
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common && \
|
||||
add-apt-repository --yes universe
|
||||
|
||||
# now install build deps
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl git ca-certificates \
|
||||
pkg-config clang musl-tools libssl-dev just && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Ubuntu 24.04 ships with user 'ubuntu' already created with UID 1000.
|
||||
USER ubuntu
|
||||
|
||||
# install Rust + musl target as dev user
|
||||
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal && \
|
||||
~/.cargo/bin/rustup target add aarch64-unknown-linux-musl && \
|
||||
~/.cargo/bin/rustup component add clippy rustfmt
|
||||
|
||||
ENV PATH="/home/ubuntu/.cargo/bin:${PATH}"
|
||||
|
||||
WORKDIR /workspace
|
||||
30
.devcontainer/README.md
Normal file
30
.devcontainer/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Containerized Development
|
||||
|
||||
We provide the following options to facilitate Codex development in a container. This is particularly useful for verifying the Linux build when working on a macOS host.
|
||||
|
||||
## Docker
|
||||
|
||||
To build the Docker image locally for x64 and then run it with the repo mounted under `/workspace`:
|
||||
|
||||
```shell
|
||||
CODEX_DOCKER_IMAGE_NAME=codex-linux-dev
|
||||
docker build --platform=linux/amd64 -t "$CODEX_DOCKER_IMAGE_NAME" ./.devcontainer
|
||||
docker run --platform=linux/amd64 --rm -it -e CARGO_TARGET_DIR=/workspace/codex-rs/target-amd64 -v "$PWD":/workspace -w /workspace/codex-rs "$CODEX_DOCKER_IMAGE_NAME"
|
||||
```
|
||||
|
||||
Note that `/workspace/target` will contain the binaries built for your host platform, so we include `-e CARGO_TARGET_DIR=/workspace/codex-rs/target-amd64` in the `docker run` command so that the binaries built inside your container are written to a separate directory.
|
||||
|
||||
For arm64, specify `--platform=linux/amd64` instead for both `docker build` and `docker run`.
|
||||
|
||||
Currently, the `Dockerfile` works for both x64 and arm64 Linux, though you need to run `rustup target add x86_64-unknown-linux-musl` yourself to install the musl toolchain for x64.
|
||||
|
||||
## VS Code
|
||||
|
||||
VS Code recognizes the `devcontainer.json` file and gives you the option to develop Codex in a container. Currently, `devcontainer.json` builds and runs the `arm64` flavor of the container.
|
||||
|
||||
From the integrated terminal in VS Code, you can build either flavor of the `arm64` build (GNU or musl):
|
||||
|
||||
```shell
|
||||
cargo build --target aarch64-unknown-linux-musl
|
||||
cargo build --target aarch64-unknown-linux-gnu
|
||||
```
|
||||
27
.devcontainer/devcontainer.json
Normal file
27
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "Codex",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": "..",
|
||||
"platform": "linux/arm64"
|
||||
},
|
||||
|
||||
/* Force VS Code to run the container as arm64 in
|
||||
case your host is x86 (or vice-versa). */
|
||||
"runArgs": ["--platform=linux/arm64"],
|
||||
|
||||
"containerEnv": {
|
||||
"RUST_BACKTRACE": "1",
|
||||
"CARGO_TARGET_DIR": "${containerWorkspaceFolder}/codex-rs/target-arm64"
|
||||
},
|
||||
|
||||
"remoteUser": "ubuntu",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"settings": {
|
||||
"terminal.integrated.defaultProfile.linux": "bash"
|
||||
},
|
||||
"extensions": ["rust-lang.rust-analyzer"]
|
||||
}
|
||||
}
|
||||
}
|
||||
1
.github/actions/codex/.gitignore
vendored
Normal file
1
.github/actions/codex/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/node_modules/
|
||||
8
.github/actions/codex/.prettierrc.toml
vendored
Normal file
8
.github/actions/codex/.prettierrc.toml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
printWidth = 80
|
||||
quoteProps = "consistent"
|
||||
semi = true
|
||||
tabWidth = 2
|
||||
trailingComma = "all"
|
||||
|
||||
# Preserve existing behavior for markdown/text wrapping.
|
||||
proseWrap = "preserve"
|
||||
140
.github/actions/codex/README.md
vendored
Normal file
140
.github/actions/codex/README.md
vendored
Normal file
@@ -0,0 +1,140 @@
|
||||
# openai/codex-action
|
||||
|
||||
`openai/codex-action` is a GitHub Action that facilitates the use of [Codex](https://github.com/openai/codex) on GitHub issues and pull requests. Using the action, associate **labels** to run Codex with the appropriate prompt for the given context. Codex will respond by posting comments or creating PRs, whichever you specify!
|
||||
|
||||
Here is a sample workflow that uses `openai/codex-action`:
|
||||
|
||||
```yaml
|
||||
name: Codex
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [labeled]
|
||||
|
||||
jobs:
|
||||
codex:
|
||||
if: ... # optional, but can be effective in conserving CI resources
|
||||
runs-on: ubuntu-latest
|
||||
# TODO(mbolin): Need to verify if/when `write` is necessary.
|
||||
permissions:
|
||||
contents: write
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
# By default, Codex runs network disabled using --full-auto, so perform
|
||||
# any setup that requires network (such as installing dependencies)
|
||||
# before openai/codex-action.
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run Codex
|
||||
uses: openai/codex-action@latest
|
||||
with:
|
||||
openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
```
|
||||
|
||||
See sample usage in [`codex.yml`](../../workflows/codex.yml).
|
||||
|
||||
## Triggering the Action
|
||||
|
||||
Using the sample workflow above, we have:
|
||||
|
||||
```yaml
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [labeled]
|
||||
```
|
||||
|
||||
which means our workflow will be triggered when any of the following events occur:
|
||||
|
||||
- a label is added to an issue
|
||||
- a label is added to a pull request against the `main` branch
|
||||
|
||||
### Label-Based Triggers
|
||||
|
||||
To define a GitHub label that should trigger Codex, create a file named `.github/codex/labels/LABEL-NAME.md` in your repository where `LABEL-NAME` is the name of the label. The content of the file is the prompt template to use when the label is added (see more on [Prompt Template Variables](#prompt-template-variables) below).
|
||||
|
||||
For example, if the file `.github/codex/labels/codex-review.md` exists, then:
|
||||
|
||||
- Adding the `codex-review` label will trigger the workflow containing the `openai/codex-action` GitHub Action.
|
||||
- When `openai/codex-action` starts, it will replace the `codex-review` label with `codex-review-in-progress`.
|
||||
- When `openai/codex-action` is finished, it will replace the `codex-review-in-progress` label with `codex-review-completed`.
|
||||
|
||||
If Codex sees that either `codex-review-in-progress` or `codex-review-completed` is already present, it will not perform the action.
|
||||
|
||||
As determined by the [default config](./src/default-label-config.ts), Codex will act on the following labels by default:
|
||||
|
||||
- Adding the `codex-review` label to a pull request will have Codex review the PR and add it to the PR as a comment.
|
||||
- Adding the `codex-triage` label to an issue will have Codex investigate the issue and report its findings as a comment.
|
||||
- Adding the `codex-issue-fix` label to an issue will have Codex attempt to fix the issue and create a PR wit the fix, if any.
|
||||
|
||||
## Action Inputs
|
||||
|
||||
The `openai/codex-action` GitHub Action takes the following inputs
|
||||
|
||||
### `openai_api_key` (required)
|
||||
|
||||
Set your `OPENAI_API_KEY` as a [repository secret](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions). See **Secrets and varaibles** then **Actions** in the settings for your GitHub repo.
|
||||
|
||||
Note that the secret name does not have to be `OPENAI_API_KEY`. For example, you might want to name it `CODEX_OPENAI_API_KEY` and then configure it on `openai/codex-action` as follows:
|
||||
|
||||
```yaml
|
||||
openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
```
|
||||
|
||||
### `github_token` (required)
|
||||
|
||||
This is required so that Codex can post a comment or create a PR. Set this value on the action as follows:
|
||||
|
||||
```yaml
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
```
|
||||
|
||||
### `codex_args`
|
||||
|
||||
A whitespace-delimited list of arguments to pass to Codex. Defaults to `--full-auto`, but if you want to override the default model to use `o3`:
|
||||
|
||||
```yaml
|
||||
codex_args: "--full-auto --model o3"
|
||||
```
|
||||
|
||||
For more complex configurations, use the `codex_home` input.
|
||||
|
||||
### `codex_home`
|
||||
|
||||
If set, the value to use for the `$CODEX_HOME` environment variable when running Codex. As explained [in the docs](https://github.com/openai/codex/tree/main/codex-rs#readme), this folder can contain the `config.toml` to configure Codex, custom instructions, and log files.
|
||||
|
||||
This should be a relative path within your repo.
|
||||
|
||||
## Prompt Template Variables
|
||||
|
||||
As shown above, `"prompt"` and `"promptPath"` are used to define prompt templates that will be populated and passed to Codex in response to certain events. All template variables are of the form `{CODEX_ACTION_...}` and the supported values are defined below.
|
||||
|
||||
### `CODEX_ACTION_ISSUE_TITLE`
|
||||
|
||||
If the action was triggered on a GitHub issue, this is the issue title.
|
||||
|
||||
Specifically it is read as the `.issue.title` from the `$GITHUB_EVENT_PATH`.
|
||||
|
||||
### `CODEX_ACTION_ISSUE_BODY`
|
||||
|
||||
If the action was triggered on a GitHub issue, this is the issue body.
|
||||
|
||||
Specifically it is read as the `.issue.body` from the `$GITHUB_EVENT_PATH`.
|
||||
|
||||
### `CODEX_ACTION_GITHUB_EVENT_PATH`
|
||||
|
||||
The value of the `$GITHUB_EVENT_PATH` environment variable, which is the path to the file that contains the JSON payload for the event that triggered the workflow. Codex can use `jq` to read only the fields of interest from this file.
|
||||
|
||||
### `CODEX_ACTION_PR_DIFF`
|
||||
|
||||
If the action was triggered on a pull request, this is the diff between the base and head commits of the PR. It is the output from `git diff`.
|
||||
|
||||
Note that the content of the diff could be quite large, so is generally safer to point Codex at `CODEX_ACTION_GITHUB_EVENT_PATH` and let it decide how it wants to explore the change.
|
||||
124
.github/actions/codex/action.yml
vendored
Normal file
124
.github/actions/codex/action.yml
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
name: "Codex [reusable action]"
|
||||
description: "A reusable action that runs a Codex model."
|
||||
|
||||
inputs:
|
||||
openai_api_key:
|
||||
description: "The value to use as the OPENAI_API_KEY environment variable when running Codex."
|
||||
required: true
|
||||
trigger_phrase:
|
||||
description: "Text to trigger Codex from a PR/issue body or comment."
|
||||
required: false
|
||||
default: ""
|
||||
github_token:
|
||||
description: "Token so Codex can comment on the PR or issue."
|
||||
required: true
|
||||
codex_args:
|
||||
description: "A whitespace-delimited list of arguments to pass to Codex. Due to limitations in YAML, arguments with spaces are not supported. For more complex configurations, use the `codex_home` input."
|
||||
required: false
|
||||
default: "--config hide_agent_reasoning=true --full-auto"
|
||||
codex_home:
|
||||
description: "Value to use as the CODEX_HOME environment variable when running Codex."
|
||||
required: false
|
||||
codex_release_tag:
|
||||
description: "The release tag of the Codex model to run."
|
||||
required: false
|
||||
default: "codex-rs-ca8e97fcbcb991e542b8689f2d4eab9d30c399d6-1-rust-v0.0.2505302325"
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Do this in Bash so we do not even bother to install Bun if the sender does
|
||||
# not have write access to the repo.
|
||||
- name: Verify user has write access to the repo.
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
PERMISSION=$(gh api \
|
||||
"/repos/${GITHUB_REPOSITORY}/collaborators/${{ github.event.sender.login }}/permission" \
|
||||
| jq -r '.permission')
|
||||
|
||||
if [[ "$PERMISSION" != "admin" && "$PERMISSION" != "write" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Download Codex
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Determine OS/arch and corresponding Codex artifact name.
|
||||
uname_s=$(uname -s)
|
||||
uname_m=$(uname -m)
|
||||
|
||||
case "$uname_s" in
|
||||
Linux*) os="linux" ;;
|
||||
Darwin*) os="apple-darwin" ;;
|
||||
*) echo "Unsupported operating system: $uname_s"; exit 1 ;;
|
||||
esac
|
||||
|
||||
case "$uname_m" in
|
||||
x86_64*) arch="x86_64" ;;
|
||||
arm64*|aarch64*) arch="aarch64" ;;
|
||||
*) echo "Unsupported architecture: $uname_m"; exit 1 ;;
|
||||
esac
|
||||
|
||||
# linux builds differentiate between musl and gnu.
|
||||
if [[ "$os" == "linux" ]]; then
|
||||
if [[ "$arch" == "x86_64" ]]; then
|
||||
triple="${arch}-unknown-linux-musl"
|
||||
else
|
||||
# Only other supported linux build is aarch64 gnu.
|
||||
triple="${arch}-unknown-linux-gnu"
|
||||
fi
|
||||
else
|
||||
# macOS
|
||||
triple="${arch}-apple-darwin"
|
||||
fi
|
||||
|
||||
# Note that if we start baking version numbers into the artifact name,
|
||||
# we will need to update this action.yml file to match.
|
||||
artifact="codex-exec-${triple}.tar.gz"
|
||||
|
||||
gh release download ${{ inputs.codex_release_tag }} --repo openai/codex \
|
||||
--pattern "$artifact" --output - \
|
||||
| tar xzO > /usr/local/bin/codex-exec
|
||||
chmod +x /usr/local/bin/codex-exec
|
||||
|
||||
# Display Codex version to confirm binary integrity; ensure we point it
|
||||
# at the checked-out repository via --cd so that any subsequent commands
|
||||
# use the correct working directory.
|
||||
codex-exec --cd "$GITHUB_WORKSPACE" --version
|
||||
|
||||
- name: Install Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: 1.2.11
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
cd ${{ github.action_path }}
|
||||
bun install --production
|
||||
|
||||
- name: Run Codex
|
||||
shell: bash
|
||||
run: bun run ${{ github.action_path }}/src/main.ts
|
||||
# Process args plus environment variables often have a max of 128 KiB,
|
||||
# so we should fit within that limit?
|
||||
env:
|
||||
INPUT_CODEX_ARGS: ${{ inputs.codex_args || '' }}
|
||||
INPUT_CODEX_HOME: ${{ inputs.codex_home || ''}}
|
||||
INPUT_TRIGGER_PHRASE: ${{ inputs.trigger_phrase || '' }}
|
||||
OPENAI_API_KEY: ${{ inputs.openai_api_key }}
|
||||
GITHUB_TOKEN: ${{ inputs.github_token }}
|
||||
GITHUB_EVENT_ACTION: ${{ github.event.action || '' }}
|
||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name || '' }}
|
||||
GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number || '' }}
|
||||
GITHUB_EVENT_ISSUE_BODY: ${{ github.event.issue.body || '' }}
|
||||
GITHUB_EVENT_REVIEW_BODY: ${{ github.event.review.body || '' }}
|
||||
GITHUB_EVENT_COMMENT_BODY: ${{ github.event.comment.body || '' }}
|
||||
85
.github/actions/codex/bun.lock
vendored
Normal file
85
.github/actions/codex/bun.lock
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "codex-action",
|
||||
"dependencies": {
|
||||
"@actions/core": "^1.11.1",
|
||||
"@actions/github": "^6.0.1",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"typescript": "^5.8.3",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@actions/core": ["@actions/core@1.11.1", "", { "dependencies": { "@actions/exec": "^1.1.1", "@actions/http-client": "^2.0.1" } }, "sha512-hXJCSrkwfA46Vd9Z3q4cpEpHB1rL5NG04+/rbqW9d3+CSvtB1tYe8UTpAlixa1vj0m/ULglfEK2UKxMGxCxv5A=="],
|
||||
|
||||
"@actions/exec": ["@actions/exec@1.1.1", "", { "dependencies": { "@actions/io": "^1.0.1" } }, "sha512-+sCcHHbVdk93a0XT19ECtO/gIXoxvdsgQLzb2fE2/5sIZmWQuluYyjPQtrtTHdU1YzTZ7bAPN4sITq2xi1679w=="],
|
||||
|
||||
"@actions/github": ["@actions/github@6.0.1", "", { "dependencies": { "@actions/http-client": "^2.2.0", "@octokit/core": "^5.0.1", "@octokit/plugin-paginate-rest": "^9.2.2", "@octokit/plugin-rest-endpoint-methods": "^10.4.0", "@octokit/request": "^8.4.1", "@octokit/request-error": "^5.1.1", "undici": "^5.28.5" } }, "sha512-xbZVcaqD4XnQAe35qSQqskb3SqIAfRyLBrHMd/8TuL7hJSz2QtbDwnNM8zWx4zO5l2fnGtseNE3MbEvD7BxVMw=="],
|
||||
|
||||
"@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],
|
||||
|
||||
"@actions/io": ["@actions/io@1.1.3", "", {}, "sha512-wi9JjgKLYS7U/z8PPbco+PvTb/nRWjeoFlJ1Qer83k/3C5PHQi28hiVdeE2kHXmIL99mQFawx8qt/JPjZilJ8Q=="],
|
||||
|
||||
"@fastify/busboy": ["@fastify/busboy@2.1.1", "", {}, "sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA=="],
|
||||
|
||||
"@octokit/auth-token": ["@octokit/auth-token@4.0.0", "", {}, "sha512-tY/msAuJo6ARbK6SPIxZrPBms3xPbfwBrulZe0Wtr/DIY9lje2HeV1uoebShn6mx7SjCHif6EjMvoREj+gZ+SA=="],
|
||||
|
||||
"@octokit/core": ["@octokit/core@5.2.1", "", { "dependencies": { "@octokit/auth-token": "^4.0.0", "@octokit/graphql": "^7.1.0", "@octokit/request": "^8.4.1", "@octokit/request-error": "^5.1.1", "@octokit/types": "^13.0.0", "before-after-hook": "^2.2.0", "universal-user-agent": "^6.0.0" } }, "sha512-dKYCMuPO1bmrpuogcjQ8z7ICCH3FP6WmxpwC03yjzGfZhj9fTJg6+bS1+UAplekbN2C+M61UNllGOOoAfGCrdQ=="],
|
||||
|
||||
"@octokit/endpoint": ["@octokit/endpoint@9.0.6", "", { "dependencies": { "@octokit/types": "^13.1.0", "universal-user-agent": "^6.0.0" } }, "sha512-H1fNTMA57HbkFESSt3Y9+FBICv+0jFceJFPWDePYlR/iMGrwM5ph+Dd4XRQs+8X+PUFURLQgX9ChPfhJ/1uNQw=="],
|
||||
|
||||
"@octokit/graphql": ["@octokit/graphql@7.1.1", "", { "dependencies": { "@octokit/request": "^8.4.1", "@octokit/types": "^13.0.0", "universal-user-agent": "^6.0.0" } }, "sha512-3mkDltSfcDUoa176nlGoA32RGjeWjl3K7F/BwHwRMJUW/IteSa4bnSV8p2ThNkcIcZU2umkZWxwETSSCJf2Q7g=="],
|
||||
|
||||
"@octokit/openapi-types": ["@octokit/openapi-types@24.2.0", "", {}, "sha512-9sIH3nSUttelJSXUrmGzl7QUBFul0/mB8HRYl3fOlgHbIWG+WnYDXU3v/2zMtAvuzZ/ed00Ei6on975FhBfzrg=="],
|
||||
|
||||
"@octokit/plugin-paginate-rest": ["@octokit/plugin-paginate-rest@9.2.2", "", { "dependencies": { "@octokit/types": "^12.6.0" }, "peerDependencies": { "@octokit/core": "5" } }, "sha512-u3KYkGF7GcZnSD/3UP0S7K5XUFT2FkOQdcfXZGZQPGv3lm4F2Xbf71lvjldr8c1H3nNbF+33cLEkWYbokGWqiQ=="],
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods": ["@octokit/plugin-rest-endpoint-methods@10.4.1", "", { "dependencies": { "@octokit/types": "^12.6.0" }, "peerDependencies": { "@octokit/core": "5" } }, "sha512-xV1b+ceKV9KytQe3zCVqjg+8GTGfDYwaT1ATU5isiUyVtlVAO3HNdzpS4sr4GBx4hxQ46s7ITtZrAsxG22+rVg=="],
|
||||
|
||||
"@octokit/request": ["@octokit/request@8.4.1", "", { "dependencies": { "@octokit/endpoint": "^9.0.6", "@octokit/request-error": "^5.1.1", "@octokit/types": "^13.1.0", "universal-user-agent": "^6.0.0" } }, "sha512-qnB2+SY3hkCmBxZsR/MPCybNmbJe4KAlfWErXq+rBKkQJlbjdJeS85VI9r8UqeLYLvnAenU8Q1okM/0MBsAGXw=="],
|
||||
|
||||
"@octokit/request-error": ["@octokit/request-error@5.1.1", "", { "dependencies": { "@octokit/types": "^13.1.0", "deprecation": "^2.0.0", "once": "^1.4.0" } }, "sha512-v9iyEQJH6ZntoENr9/yXxjuezh4My67CBSu9r6Ve/05Iu5gNgnisNWOsoJHTP6k0Rr0+HQIpnH+kyammu90q/g=="],
|
||||
|
||||
"@octokit/types": ["@octokit/types@13.10.0", "", { "dependencies": { "@octokit/openapi-types": "^24.2.0" } }, "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.2.13", "", { "dependencies": { "bun-types": "1.2.13" } }, "sha512-u6vXep/i9VBxoJl3GjZsl/BFIsvML8DfVDO0RYLEwtSZSp981kEO1V5NwRcO1CPJ7AmvpbnDCiMKo3JvbDEjAg=="],
|
||||
|
||||
"@types/node": ["@types/node@22.15.21", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-EV/37Td6c+MgKAbkcLG6vqZ2zEYHD7bvSrzqqs2RIhbA6w3x+Dqz8MZM3sP6kGTeLrdoOgKZe+Xja7tUB2DNkQ=="],
|
||||
|
||||
"before-after-hook": ["before-after-hook@2.2.3", "", {}, "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ=="],
|
||||
|
||||
"bun-types": ["bun-types@1.2.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-rRjA1T6n7wto4gxhAO/ErZEtOXyEZEmnIHQfl0Dt1QQSB4QV0iP6BZ9/YB5fZaHFQ2dwHFrmPaRQ9GGMX01k9Q=="],
|
||||
|
||||
"deprecation": ["deprecation@2.3.1", "", {}, "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ=="],
|
||||
|
||||
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
|
||||
|
||||
"prettier": ["prettier@3.5.3", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw=="],
|
||||
|
||||
"tunnel": ["tunnel@0.0.6", "", {}, "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg=="],
|
||||
|
||||
"typescript": ["typescript@5.8.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="],
|
||||
|
||||
"undici": ["undici@5.29.0", "", { "dependencies": { "@fastify/busboy": "^2.0.0" } }, "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg=="],
|
||||
|
||||
"undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
|
||||
|
||||
"universal-user-agent": ["universal-user-agent@6.0.1", "", {}, "sha512-yCzhz6FN2wU1NiiQRogkTQszlQSlpWaw8SvVegAc+bDxbzHgh1vX8uIe8OYyMH6DwH+sdTJsgMl36+mSMdRJIQ=="],
|
||||
|
||||
"wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="],
|
||||
|
||||
"@octokit/plugin-paginate-rest/@octokit/types": ["@octokit/types@12.6.0", "", { "dependencies": { "@octokit/openapi-types": "^20.0.0" } }, "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw=="],
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types": ["@octokit/types@12.6.0", "", { "dependencies": { "@octokit/openapi-types": "^20.0.0" } }, "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw=="],
|
||||
|
||||
"@octokit/plugin-paginate-rest/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
}
|
||||
}
|
||||
21
.github/actions/codex/package.json
vendored
Normal file
21
.github/actions/codex/package.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "codex-action",
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"format": "prettier --check src",
|
||||
"format:fix": "prettier --write src",
|
||||
"test": "bun test",
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"@actions/core": "^1.11.1",
|
||||
"@actions/github": "^6.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"typescript": "^5.8.3"
|
||||
}
|
||||
}
|
||||
85
.github/actions/codex/src/add-reaction.ts
vendored
Normal file
85
.github/actions/codex/src/add-reaction.ts
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
import * as github from "@actions/github";
|
||||
import type { EnvContext } from "./env-context";
|
||||
|
||||
/**
|
||||
* Add an "eyes" reaction to the entity (issue, issue comment, or pull request
|
||||
* review comment) that triggered the current Codex invocation.
|
||||
*
|
||||
* The purpose is to provide immediate feedback to the user – similar to the
|
||||
* *-in-progress label flow – indicating that the bot has acknowledged the
|
||||
* request and is working on it.
|
||||
*
|
||||
* We attempt to add the reaction best suited for the current GitHub event:
|
||||
*
|
||||
* • issues → POST /repos/{owner}/{repo}/issues/{issue_number}/reactions
|
||||
* • issue_comment → POST /repos/{owner}/{repo}/issues/comments/{comment_id}/reactions
|
||||
* • pull_request_review_comment → POST /repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions
|
||||
*
|
||||
* If the specific target is unavailable (e.g. unexpected payload shape) we
|
||||
* silently skip instead of failing the whole action because the reaction is
|
||||
* merely cosmetic.
|
||||
*/
|
||||
export async function addEyesReaction(ctx: EnvContext): Promise<void> {
|
||||
const octokit = ctx.getOctokit();
|
||||
const { owner, repo } = github.context.repo;
|
||||
const eventName = github.context.eventName;
|
||||
|
||||
try {
|
||||
switch (eventName) {
|
||||
case "issue_comment": {
|
||||
const commentId = (github.context.payload as any)?.comment?.id;
|
||||
if (commentId) {
|
||||
await octokit.rest.reactions.createForIssueComment({
|
||||
owner,
|
||||
repo,
|
||||
comment_id: commentId,
|
||||
content: "eyes",
|
||||
});
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "pull_request_review_comment": {
|
||||
const commentId = (github.context.payload as any)?.comment?.id;
|
||||
if (commentId) {
|
||||
await octokit.rest.reactions.createForPullRequestReviewComment({
|
||||
owner,
|
||||
repo,
|
||||
comment_id: commentId,
|
||||
content: "eyes",
|
||||
});
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "issues": {
|
||||
const issueNumber = github.context.issue.number;
|
||||
if (issueNumber) {
|
||||
await octokit.rest.reactions.createForIssue({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
content: "eyes",
|
||||
});
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Fallback: try to react to the issue/PR if we have a number.
|
||||
const issueNumber = github.context.issue.number;
|
||||
if (issueNumber) {
|
||||
await octokit.rest.reactions.createForIssue({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
content: "eyes",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Do not fail the action if reaction creation fails – log and continue.
|
||||
console.warn(`Failed to add \"eyes\" reaction: ${error}`);
|
||||
}
|
||||
}
|
||||
53
.github/actions/codex/src/comment.ts
vendored
Normal file
53
.github/actions/codex/src/comment.ts
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { EnvContext } from "./env-context";
|
||||
import { runCodex } from "./run-codex";
|
||||
import { postComment } from "./post-comment";
|
||||
import { addEyesReaction } from "./add-reaction";
|
||||
|
||||
/**
|
||||
* Handle `issue_comment` and `pull_request_review_comment` events once we know
|
||||
* the action is supported.
|
||||
*/
|
||||
export async function onComment(ctx: EnvContext): Promise<void> {
|
||||
const triggerPhrase = ctx.tryGet("INPUT_TRIGGER_PHRASE");
|
||||
if (!triggerPhrase) {
|
||||
console.warn("Empty trigger phrase: skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Attempt to get the body of the comment from the environment. Depending on
|
||||
// the event type either `GITHUB_EVENT_COMMENT_BODY` (issue & PR comments) or
|
||||
// `GITHUB_EVENT_REVIEW_BODY` (PR reviews) is set.
|
||||
const commentBody =
|
||||
ctx.tryGetNonEmpty("GITHUB_EVENT_COMMENT_BODY") ??
|
||||
ctx.tryGetNonEmpty("GITHUB_EVENT_REVIEW_BODY") ??
|
||||
ctx.tryGetNonEmpty("GITHUB_EVENT_ISSUE_BODY");
|
||||
|
||||
if (!commentBody) {
|
||||
console.warn("Comment body not found in environment: skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the trigger phrase is present.
|
||||
if (!commentBody.includes(triggerPhrase)) {
|
||||
console.log(
|
||||
`Trigger phrase '${triggerPhrase}' not found: nothing to do for this comment.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Derive the prompt by removing the trigger phrase. Remove only the first
|
||||
// occurrence to keep any additional occurrences that might be meaningful.
|
||||
const prompt = commentBody.replace(triggerPhrase, "").trim();
|
||||
|
||||
if (prompt.length === 0) {
|
||||
console.warn("Prompt is empty after removing trigger phrase: skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
// Provide immediate feedback that we are working on the request.
|
||||
await addEyesReaction(ctx);
|
||||
|
||||
// Run Codex and post the response as a new comment.
|
||||
const lastMessage = await runCodex(prompt, ctx);
|
||||
await postComment(lastMessage, ctx);
|
||||
}
|
||||
11
.github/actions/codex/src/config.ts
vendored
Normal file
11
.github/actions/codex/src/config.ts
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
import { readdirSync, statSync } from "fs";
|
||||
import * as path from "path";
|
||||
|
||||
export interface Config {
|
||||
labels: Record<string, LabelConfig>;
|
||||
}
|
||||
|
||||
export interface LabelConfig {
|
||||
/** Returns the prompt template. */
|
||||
getPromptTemplate(): string;
|
||||
}
|
||||
44
.github/actions/codex/src/default-label-config.ts
vendored
Normal file
44
.github/actions/codex/src/default-label-config.ts
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
import type { Config } from "./config";
|
||||
|
||||
export function getDefaultConfig(): Config {
|
||||
return {
|
||||
labels: {
|
||||
"codex-investigate-issue": {
|
||||
getPromptTemplate: () =>
|
||||
`
|
||||
Troubleshoot whether the reported issue is valid.
|
||||
|
||||
Provide a concise and respectful comment summarizing the findings.
|
||||
|
||||
### {CODEX_ACTION_ISSUE_TITLE}
|
||||
|
||||
{CODEX_ACTION_ISSUE_BODY}
|
||||
`.trim(),
|
||||
},
|
||||
"codex-code-review": {
|
||||
getPromptTemplate: () =>
|
||||
`
|
||||
Review this PR and respond with a very concise final message, formatted in Markdown.
|
||||
|
||||
There should be a summary of the changes (1-2 sentences) and a few bullet points if necessary.
|
||||
|
||||
Then provide the **review** (1-2 sentences plus bullet points, friendly tone).
|
||||
|
||||
{CODEX_ACTION_GITHUB_EVENT_PATH} contains the JSON that triggered this GitHub workflow. It contains the \`base\` and \`head\` refs that define this PR. Both refs are available locally.
|
||||
`.trim(),
|
||||
},
|
||||
"codex-attempt-fix": {
|
||||
getPromptTemplate: () =>
|
||||
`
|
||||
Attempt to solve the reported issue.
|
||||
|
||||
If a code change is required, create a new branch, commit the fix, and open a pull-request that resolves the problem.
|
||||
|
||||
### {CODEX_ACTION_ISSUE_TITLE}
|
||||
|
||||
{CODEX_ACTION_ISSUE_BODY}
|
||||
`.trim(),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
116
.github/actions/codex/src/env-context.ts
vendored
Normal file
116
.github/actions/codex/src/env-context.ts
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Centralised access to environment variables used by the Codex GitHub
|
||||
* Action.
|
||||
*
|
||||
* To enable proper unit-testing we avoid reading from `process.env` at module
|
||||
* initialisation time. Instead a `EnvContext` object is created (usually from
|
||||
* the real `process.env`) and passed around explicitly or – where that is not
|
||||
* yet practical – imported as the shared `defaultContext` singleton. Tests can
|
||||
* create their own context backed by a stubbed map of variables without having
|
||||
* to mutate global state.
|
||||
*/
|
||||
|
||||
import { fail } from "./fail";
|
||||
import * as github from "@actions/github";
|
||||
|
||||
export interface EnvContext {
|
||||
/**
|
||||
* Return the value for a given environment variable or terminate the action
|
||||
* via `fail` if it is missing / empty.
|
||||
*/
|
||||
get(name: string): string;
|
||||
|
||||
/**
|
||||
* Attempt to read an environment variable. Returns the value when present;
|
||||
* otherwise returns undefined (does not call `fail`).
|
||||
*/
|
||||
tryGet(name: string): string | undefined;
|
||||
|
||||
/**
|
||||
* Attempt to read an environment variable. Returns non-empty string value or
|
||||
* null if unset or empty string.
|
||||
*/
|
||||
tryGetNonEmpty(name: string): string | null;
|
||||
|
||||
/**
|
||||
* Return a memoised Octokit instance authenticated via the token resolved
|
||||
* from the provided argument (when defined) or the environment variables
|
||||
* `GITHUB_TOKEN`/`GH_TOKEN`.
|
||||
*
|
||||
* Subsequent calls return the same cached instance to avoid spawning
|
||||
* multiple REST clients within a single action run.
|
||||
*/
|
||||
getOctokit(token?: string): ReturnType<typeof github.getOctokit>;
|
||||
}
|
||||
|
||||
/** Internal helper – *not* exported. */
|
||||
function _getRequiredEnv(
|
||||
name: string,
|
||||
env: Record<string, string | undefined>,
|
||||
): string | undefined {
|
||||
const value = env[name];
|
||||
|
||||
// Avoid leaking secrets into logs while still logging non-secret variables.
|
||||
if (name.endsWith("KEY") || name.endsWith("TOKEN")) {
|
||||
if (value) {
|
||||
console.log(`value for ${name} was found`);
|
||||
}
|
||||
} else {
|
||||
console.log(`${name}=${value}`);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/** Create a context backed by the supplied environment map (defaults to `process.env`). */
|
||||
export function createEnvContext(
|
||||
env: Record<string, string | undefined> = process.env,
|
||||
): EnvContext {
|
||||
// Lazily instantiated Octokit client – shared across this context.
|
||||
let cachedOctokit: ReturnType<typeof github.getOctokit> | null = null;
|
||||
|
||||
return {
|
||||
get(name: string): string {
|
||||
const value = _getRequiredEnv(name, env);
|
||||
if (value == null) {
|
||||
fail(`Missing required environment variable: ${name}`);
|
||||
}
|
||||
return value;
|
||||
},
|
||||
|
||||
tryGet(name: string): string | undefined {
|
||||
return _getRequiredEnv(name, env);
|
||||
},
|
||||
|
||||
tryGetNonEmpty(name: string): string | null {
|
||||
const value = _getRequiredEnv(name, env);
|
||||
return value == null || value === "" ? null : value;
|
||||
},
|
||||
|
||||
getOctokit(token?: string) {
|
||||
if (cachedOctokit) {
|
||||
return cachedOctokit;
|
||||
}
|
||||
|
||||
// Determine the token to authenticate with.
|
||||
const githubToken = token ?? env["GITHUB_TOKEN"] ?? env["GH_TOKEN"];
|
||||
|
||||
if (!githubToken) {
|
||||
fail(
|
||||
"Unable to locate a GitHub token. `github_token` should have been set on the action.",
|
||||
);
|
||||
}
|
||||
|
||||
cachedOctokit = github.getOctokit(githubToken!);
|
||||
return cachedOctokit;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared context built from the actual `process.env`. Production code that is
|
||||
* not yet refactored to receive a context explicitly may import and use this
|
||||
* singleton. Tests should avoid the singleton and instead pass their own
|
||||
* context to the functions they exercise.
|
||||
*/
|
||||
export const defaultContext: EnvContext = createEnvContext();
|
||||
4
.github/actions/codex/src/fail.ts
vendored
Normal file
4
.github/actions/codex/src/fail.ts
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
export function fail(message: string): never {
|
||||
console.error(message);
|
||||
process.exit(1);
|
||||
}
|
||||
149
.github/actions/codex/src/git-helpers.ts
vendored
Normal file
149
.github/actions/codex/src/git-helpers.ts
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
import { spawnSync } from "child_process";
|
||||
import * as github from "@actions/github";
|
||||
import { EnvContext } from "./env-context";
|
||||
|
||||
function runGit(args: string[], silent = true): string {
|
||||
console.info(`Running git ${args.join(" ")}`);
|
||||
const res = spawnSync("git", args, {
|
||||
encoding: "utf8",
|
||||
stdio: silent ? ["ignore", "pipe", "pipe"] : "inherit",
|
||||
});
|
||||
if (res.error) {
|
||||
throw res.error;
|
||||
}
|
||||
if (res.status !== 0) {
|
||||
// Return stderr so caller may handle; else throw.
|
||||
throw new Error(
|
||||
`git ${args.join(" ")} failed with code ${res.status}: ${res.stderr}`,
|
||||
);
|
||||
}
|
||||
return res.stdout.trim();
|
||||
}
|
||||
|
||||
function stageAllChanges() {
|
||||
runGit(["add", "-A"]);
|
||||
}
|
||||
|
||||
function hasStagedChanges(): boolean {
|
||||
const res = spawnSync("git", ["diff", "--cached", "--quiet", "--exit-code"]);
|
||||
return res.status !== 0;
|
||||
}
|
||||
|
||||
function ensureOnBranch(
|
||||
issueNumber: number,
|
||||
protectedBranches: string[],
|
||||
suggestedSlug?: string,
|
||||
): string {
|
||||
let branch = "";
|
||||
try {
|
||||
branch = runGit(["symbolic-ref", "--short", "-q", "HEAD"]);
|
||||
} catch {
|
||||
branch = "";
|
||||
}
|
||||
|
||||
// If detached HEAD or on a protected branch, create a new branch.
|
||||
if (!branch || protectedBranches.includes(branch)) {
|
||||
if (suggestedSlug) {
|
||||
const safeSlug = suggestedSlug
|
||||
.toLowerCase()
|
||||
.replace(/[^\w\s-]/g, "")
|
||||
.trim()
|
||||
.replace(/\s+/g, "-");
|
||||
branch = `codex-fix-${issueNumber}-${safeSlug}`;
|
||||
} else {
|
||||
branch = `codex-fix-${issueNumber}-${Date.now()}`;
|
||||
}
|
||||
runGit(["switch", "-c", branch]);
|
||||
}
|
||||
return branch;
|
||||
}
|
||||
|
||||
function commitIfNeeded(issueNumber: number) {
|
||||
if (hasStagedChanges()) {
|
||||
runGit([
|
||||
"commit",
|
||||
"-m",
|
||||
`fix: automated fix for #${issueNumber} via Codex`,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
function pushBranch(branch: string, githubToken: string, ctx: EnvContext) {
|
||||
const repoSlug = ctx.get("GITHUB_REPOSITORY"); // owner/repo
|
||||
const remoteUrl = `https://x-access-token:${githubToken}@github.com/${repoSlug}.git`;
|
||||
|
||||
runGit(["push", "--force-with-lease", "-u", remoteUrl, `HEAD:${branch}`]);
|
||||
}
|
||||
|
||||
/**
|
||||
* If this returns a string, it is the URL of the created PR.
|
||||
*/
|
||||
export async function maybePublishPRForIssue(
|
||||
issueNumber: number,
|
||||
lastMessage: string,
|
||||
ctx: EnvContext,
|
||||
): Promise<string | undefined> {
|
||||
// Only proceed if GITHUB_TOKEN available.
|
||||
const githubToken =
|
||||
ctx.tryGetNonEmpty("GITHUB_TOKEN") ?? ctx.tryGetNonEmpty("GH_TOKEN");
|
||||
if (!githubToken) {
|
||||
console.warn("No GitHub token - skipping PR creation.");
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Print `git status` for debugging.
|
||||
runGit(["status"]);
|
||||
|
||||
// Stage any remaining changes so they can be committed and pushed.
|
||||
stageAllChanges();
|
||||
|
||||
const octokit = ctx.getOctokit(githubToken);
|
||||
|
||||
const { owner, repo } = github.context.repo;
|
||||
|
||||
// Determine default branch to treat as protected.
|
||||
let defaultBranch = "main";
|
||||
try {
|
||||
const repoInfo = await octokit.rest.repos.get({ owner, repo });
|
||||
defaultBranch = repoInfo.data.default_branch ?? "main";
|
||||
} catch (e) {
|
||||
console.warn(`Failed to get default branch, assuming 'main': ${e}`);
|
||||
}
|
||||
|
||||
const sanitizedMessage = lastMessage.replace(/\u2022/g, "-");
|
||||
const [summaryLine] = sanitizedMessage.split(/\r?\n/);
|
||||
const branch = ensureOnBranch(issueNumber, [defaultBranch, "master"], summaryLine);
|
||||
commitIfNeeded(issueNumber);
|
||||
pushBranch(branch, githubToken, ctx);
|
||||
|
||||
// Try to find existing PR for this branch
|
||||
const headParam = `${owner}:${branch}`;
|
||||
const existing = await octokit.rest.pulls.list({
|
||||
owner,
|
||||
repo,
|
||||
head: headParam,
|
||||
state: "open",
|
||||
});
|
||||
if (existing.data.length > 0) {
|
||||
return existing.data[0].html_url;
|
||||
}
|
||||
|
||||
// Determine base branch (default to main)
|
||||
let baseBranch = "main";
|
||||
try {
|
||||
const repoInfo = await octokit.rest.repos.get({ owner, repo });
|
||||
baseBranch = repoInfo.data.default_branch ?? "main";
|
||||
} catch (e) {
|
||||
console.warn(`Failed to get default branch, assuming 'main': ${e}`);
|
||||
}
|
||||
|
||||
const pr = await octokit.rest.pulls.create({
|
||||
owner,
|
||||
repo,
|
||||
title: summaryLine,
|
||||
head: branch,
|
||||
base: baseBranch,
|
||||
body: sanitizedMessage,
|
||||
});
|
||||
return pr.data.html_url;
|
||||
}
|
||||
16
.github/actions/codex/src/git-user.ts
vendored
Normal file
16
.github/actions/codex/src/git-user.ts
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
export function setGitHubActionsUser(): void {
|
||||
const commands = [
|
||||
["git", "config", "--global", "user.name", "github-actions[bot]"],
|
||||
[
|
||||
"git",
|
||||
"config",
|
||||
"--global",
|
||||
"user.email",
|
||||
"41898282+github-actions[bot]@users.noreply.github.com",
|
||||
],
|
||||
];
|
||||
|
||||
for (const command of commands) {
|
||||
Bun.spawnSync(command);
|
||||
}
|
||||
}
|
||||
11
.github/actions/codex/src/github-workspace.ts
vendored
Normal file
11
.github/actions/codex/src/github-workspace.ts
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
import * as pathMod from "path";
|
||||
import { EnvContext } from "./env-context";
|
||||
|
||||
export function resolveWorkspacePath(path: string, ctx: EnvContext): string {
|
||||
if (pathMod.isAbsolute(path)) {
|
||||
return path;
|
||||
} else {
|
||||
const workspace = ctx.get("GITHUB_WORKSPACE");
|
||||
return pathMod.join(workspace, path);
|
||||
}
|
||||
}
|
||||
56
.github/actions/codex/src/load-config.ts
vendored
Normal file
56
.github/actions/codex/src/load-config.ts
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
import type { Config, LabelConfig } from "./config";
|
||||
|
||||
import { getDefaultConfig } from "./default-label-config";
|
||||
import { readFileSync, readdirSync, statSync } from "fs";
|
||||
import * as path from "path";
|
||||
|
||||
/**
|
||||
* Build an in-memory configuration object by scanning the repository for
|
||||
* Markdown templates located in `.github/codex/labels`.
|
||||
*
|
||||
* Each `*.md` file in that directory represents a label that can trigger the
|
||||
* Codex GitHub Action. The filename **without** the extension is interpreted
|
||||
* as the label name, e.g. `codex-review.md` ➜ `codex-review`.
|
||||
*
|
||||
* For every such label we derive the corresponding `doneLabel` by appending
|
||||
* the suffix `-completed`.
|
||||
*/
|
||||
export function loadConfig(workspace: string): Config {
|
||||
const labelsDir = path.join(workspace, ".github", "codex", "labels");
|
||||
|
||||
let entries: string[];
|
||||
try {
|
||||
entries = readdirSync(labelsDir);
|
||||
} catch {
|
||||
// If the directory is missing, return the default configuration.
|
||||
return getDefaultConfig();
|
||||
}
|
||||
|
||||
const labels: Record<string, LabelConfig> = {};
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.endsWith(".md")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fullPath = path.join(labelsDir, entry);
|
||||
|
||||
if (!statSync(fullPath).isFile()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const labelName = entry.slice(0, -3); // trim ".md"
|
||||
|
||||
labels[labelName] = new FileLabelConfig(fullPath);
|
||||
}
|
||||
|
||||
return { labels };
|
||||
}
|
||||
|
||||
class FileLabelConfig implements LabelConfig {
|
||||
constructor(private readonly promptPath: string) {}
|
||||
|
||||
getPromptTemplate(): string {
|
||||
return readFileSync(this.promptPath, "utf8");
|
||||
}
|
||||
}
|
||||
80
.github/actions/codex/src/main.ts
vendored
Executable file
80
.github/actions/codex/src/main.ts
vendored
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import type { Config } from "./config";
|
||||
|
||||
import { defaultContext, EnvContext } from "./env-context";
|
||||
import { loadConfig } from "./load-config";
|
||||
import { setGitHubActionsUser } from "./git-user";
|
||||
import { onLabeled } from "./process-label";
|
||||
import { ensureBaseAndHeadCommitsForPRAreAvailable } from "./prompt-template";
|
||||
import { performAdditionalValidation } from "./verify-inputs";
|
||||
import { onComment } from "./comment";
|
||||
import { onReview } from "./review";
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const ctx: EnvContext = defaultContext;
|
||||
|
||||
// Build the configuration dynamically by scanning `.github/codex/labels`.
|
||||
const GITHUB_WORKSPACE = ctx.get("GITHUB_WORKSPACE");
|
||||
const config: Config = loadConfig(GITHUB_WORKSPACE);
|
||||
|
||||
// Optionally perform additional validation of prompt template files.
|
||||
performAdditionalValidation(config, GITHUB_WORKSPACE);
|
||||
|
||||
const GITHUB_EVENT_NAME = ctx.get("GITHUB_EVENT_NAME");
|
||||
const GITHUB_EVENT_ACTION = ctx.get("GITHUB_EVENT_ACTION");
|
||||
|
||||
// Set user.name and user.email to a bot before Codex runs, just in case it
|
||||
// creates a commit.
|
||||
setGitHubActionsUser();
|
||||
|
||||
switch (GITHUB_EVENT_NAME) {
|
||||
case "issues": {
|
||||
if (GITHUB_EVENT_ACTION === "labeled") {
|
||||
await onLabeled(config, ctx);
|
||||
return;
|
||||
} else if (GITHUB_EVENT_ACTION === "opened") {
|
||||
await onComment(ctx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "issue_comment": {
|
||||
if (GITHUB_EVENT_ACTION === "created") {
|
||||
await onComment(ctx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "pull_request": {
|
||||
if (GITHUB_EVENT_ACTION === "labeled") {
|
||||
await ensureBaseAndHeadCommitsForPRAreAvailable(ctx);
|
||||
await onLabeled(config, ctx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "pull_request_review": {
|
||||
await ensureBaseAndHeadCommitsForPRAreAvailable(ctx);
|
||||
if (GITHUB_EVENT_ACTION === "submitted") {
|
||||
await onReview(ctx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "pull_request_review_comment": {
|
||||
await ensureBaseAndHeadCommitsForPRAreAvailable(ctx);
|
||||
if (GITHUB_EVENT_ACTION === "created") {
|
||||
await onComment(ctx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`Unsupported action '${GITHUB_EVENT_ACTION}' for event '${GITHUB_EVENT_NAME}'.`,
|
||||
);
|
||||
}
|
||||
|
||||
main();
|
||||
62
.github/actions/codex/src/post-comment.ts
vendored
Normal file
62
.github/actions/codex/src/post-comment.ts
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
import { fail } from "./fail";
|
||||
import * as github from "@actions/github";
|
||||
import { EnvContext } from "./env-context";
|
||||
|
||||
/**
|
||||
* Post a comment to the issue / pull request currently in scope.
|
||||
*
|
||||
* Provide the environment context so that token lookup (inside getOctokit) does
|
||||
* not rely on global state.
|
||||
*/
|
||||
export async function postComment(
|
||||
commentBody: string,
|
||||
ctx: EnvContext,
|
||||
): Promise<void> {
|
||||
// Append a footer with a link back to the workflow run, if available.
|
||||
const footer = buildWorkflowRunFooter(ctx);
|
||||
const bodyWithFooter = footer ? `${commentBody}${footer}` : commentBody;
|
||||
|
||||
const octokit = ctx.getOctokit();
|
||||
console.info("Got Octokit instance for posting comment");
|
||||
const { owner, repo } = github.context.repo;
|
||||
const issueNumber = github.context.issue.number;
|
||||
|
||||
if (!issueNumber) {
|
||||
console.warn(
|
||||
"No issue or pull_request number found in GitHub context; skipping comment creation.",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
console.info("Calling octokit.rest.issues.createComment()");
|
||||
await octokit.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
body: bodyWithFooter,
|
||||
});
|
||||
} catch (error) {
|
||||
fail(`Failed to create comment via GitHub API: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to build a Markdown fragment linking back to the workflow run that
|
||||
* generated the current comment. Returns `undefined` if required environment
|
||||
* variables are missing – e.g. when running outside of GitHub Actions – so we
|
||||
* can gracefully skip the footer in those cases.
|
||||
*/
|
||||
function buildWorkflowRunFooter(ctx: EnvContext): string | undefined {
|
||||
const serverUrl =
|
||||
ctx.tryGetNonEmpty("GITHUB_SERVER_URL") ?? "https://github.com";
|
||||
const repository = ctx.tryGetNonEmpty("GITHUB_REPOSITORY");
|
||||
const runId = ctx.tryGetNonEmpty("GITHUB_RUN_ID");
|
||||
|
||||
if (!repository || !runId) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const url = `${serverUrl}/${repository}/actions/runs/${runId}`;
|
||||
return `\n\n---\n*[_View workflow run_](${url})*`;
|
||||
}
|
||||
195
.github/actions/codex/src/process-label.ts
vendored
Normal file
195
.github/actions/codex/src/process-label.ts
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
import { fail } from "./fail";
|
||||
import { EnvContext } from "./env-context";
|
||||
import { renderPromptTemplate } from "./prompt-template";
|
||||
|
||||
import { postComment } from "./post-comment";
|
||||
import { runCodex } from "./run-codex";
|
||||
|
||||
import * as github from "@actions/github";
|
||||
import { Config, LabelConfig } from "./config";
|
||||
import { maybePublishPRForIssue } from "./git-helpers";
|
||||
|
||||
export async function onLabeled(
|
||||
config: Config,
|
||||
ctx: EnvContext,
|
||||
): Promise<void> {
|
||||
const GITHUB_EVENT_LABEL_NAME = ctx.get("GITHUB_EVENT_LABEL_NAME");
|
||||
const labelConfig = config.labels[GITHUB_EVENT_LABEL_NAME] as
|
||||
| LabelConfig
|
||||
| undefined;
|
||||
if (!labelConfig) {
|
||||
fail(
|
||||
`Label \`${GITHUB_EVENT_LABEL_NAME}\` not found in config: ${JSON.stringify(config)}`,
|
||||
);
|
||||
}
|
||||
|
||||
await processLabelConfig(ctx, GITHUB_EVENT_LABEL_NAME, labelConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper that handles `-in-progress` and `-completed` semantics around the core lint/fix/review
|
||||
* processing. It will:
|
||||
*
|
||||
* - Skip execution if the `-in-progress` or `-completed` label is already present.
|
||||
* - Mark the PR/issue as `-in-progress`.
|
||||
* - After successful execution, mark the PR/issue as `-completed`.
|
||||
*/
|
||||
async function processLabelConfig(
|
||||
ctx: EnvContext,
|
||||
label: string,
|
||||
labelConfig: LabelConfig,
|
||||
): Promise<void> {
|
||||
const octokit = ctx.getOctokit();
|
||||
const { owner, repo, issueNumber, labelNames } =
|
||||
await getCurrentLabels(octokit);
|
||||
|
||||
const inProgressLabel = `${label}-in-progress`;
|
||||
const completedLabel = `${label}-completed`;
|
||||
for (const markerLabel of [inProgressLabel, completedLabel]) {
|
||||
if (labelNames.includes(markerLabel)) {
|
||||
console.log(
|
||||
`Label '${markerLabel}' already present on issue/PR #${issueNumber}. Skipping Codex action.`,
|
||||
);
|
||||
|
||||
// Clean up: remove the triggering label to avoid confusion and re-runs.
|
||||
await addAndRemoveLabels(octokit, {
|
||||
owner,
|
||||
repo,
|
||||
issueNumber,
|
||||
remove: markerLabel,
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Mark the PR/issue as in progress.
|
||||
await addAndRemoveLabels(octokit, {
|
||||
owner,
|
||||
repo,
|
||||
issueNumber,
|
||||
add: inProgressLabel,
|
||||
remove: label,
|
||||
});
|
||||
|
||||
// Run the core Codex processing.
|
||||
await processLabel(ctx, label, labelConfig);
|
||||
|
||||
// Mark the PR/issue as completed.
|
||||
await addAndRemoveLabels(octokit, {
|
||||
owner,
|
||||
repo,
|
||||
issueNumber,
|
||||
add: completedLabel,
|
||||
remove: inProgressLabel,
|
||||
});
|
||||
}
|
||||
|
||||
async function processLabel(
|
||||
ctx: EnvContext,
|
||||
label: string,
|
||||
labelConfig: LabelConfig,
|
||||
): Promise<void> {
|
||||
const template = labelConfig.getPromptTemplate();
|
||||
const populatedTemplate = await renderPromptTemplate(template, ctx);
|
||||
|
||||
// Always run Codex and post the resulting message as a comment.
|
||||
let commentBody = await runCodex(populatedTemplate, ctx);
|
||||
|
||||
// Current heuristic: only try to create a PR if "attempt" or "fix" is in the
|
||||
// label name. (Yes, we plan to evolve this.)
|
||||
if (label.indexOf("fix") !== -1 || label.indexOf("attempt") !== -1) {
|
||||
console.info(`label ${label} indicates we should attempt to create a PR`);
|
||||
const prUrl = await maybeFixIssue(ctx, commentBody);
|
||||
if (prUrl) {
|
||||
commentBody += `\n\n---\nOpened pull request: ${prUrl}`;
|
||||
}
|
||||
} else {
|
||||
console.info(
|
||||
`label ${label} does not indicate we should attempt to create a PR`,
|
||||
);
|
||||
}
|
||||
|
||||
await postComment(commentBody, ctx);
|
||||
}
|
||||
|
||||
async function maybeFixIssue(
|
||||
ctx: EnvContext,
|
||||
lastMessage: string,
|
||||
): Promise<string | undefined> {
|
||||
// Attempt to create a PR out of any changes Codex produced.
|
||||
const issueNumber = github.context.issue.number!; // exists for issues triggering this path
|
||||
try {
|
||||
return await maybePublishPRForIssue(issueNumber, lastMessage, ctx);
|
||||
} catch (e) {
|
||||
console.warn(`Failed to publish PR: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function getCurrentLabels(
|
||||
octokit: ReturnType<typeof github.getOctokit>,
|
||||
): Promise<{
|
||||
owner: string;
|
||||
repo: string;
|
||||
issueNumber: number;
|
||||
labelNames: Array<string>;
|
||||
}> {
|
||||
const { owner, repo } = github.context.repo;
|
||||
const issueNumber = github.context.issue.number;
|
||||
|
||||
if (!issueNumber) {
|
||||
fail("No issue or pull_request number found in GitHub context.");
|
||||
}
|
||||
|
||||
const { data: issueData } = await octokit.rest.issues.get({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
});
|
||||
|
||||
const labelNames =
|
||||
issueData.labels?.map((label: any) =>
|
||||
typeof label === "string" ? label : label.name,
|
||||
) ?? [];
|
||||
|
||||
return { owner, repo, issueNumber, labelNames };
|
||||
}
|
||||
|
||||
async function addAndRemoveLabels(
|
||||
octokit: ReturnType<typeof github.getOctokit>,
|
||||
opts: {
|
||||
owner: string;
|
||||
repo: string;
|
||||
issueNumber: number;
|
||||
add?: string;
|
||||
remove?: string;
|
||||
},
|
||||
): Promise<void> {
|
||||
const { owner, repo, issueNumber, add, remove } = opts;
|
||||
|
||||
if (add) {
|
||||
try {
|
||||
await octokit.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
labels: [add],
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(`Failed to add label '${add}': ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (remove) {
|
||||
try {
|
||||
await octokit.rest.issues.removeLabel({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
name: remove,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(`Failed to remove label '${remove}': ${error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
284
.github/actions/codex/src/prompt-template.ts
vendored
Normal file
284
.github/actions/codex/src/prompt-template.ts
vendored
Normal file
@@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Utilities to render Codex prompt templates.
|
||||
*
|
||||
* A template is a Markdown (or plain-text) file that may contain one or more
|
||||
* placeholders of the form `{CODEX_ACTION_<NAME>}`. At runtime these
|
||||
* placeholders are substituted with dynamically generated content. Each
|
||||
* placeholder is resolved **exactly once** even if it appears multiple times
|
||||
* in the same template.
|
||||
*/
|
||||
|
||||
import { readFile } from "fs/promises";
|
||||
|
||||
import { EnvContext } from "./env-context";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Lazily caches parsed `$GITHUB_EVENT_PATH` contents keyed by the file path so
|
||||
* we only hit the filesystem once per unique event payload.
|
||||
*/
|
||||
const githubEventDataCache: Map<string, Promise<any>> = new Map();
|
||||
|
||||
function getGitHubEventData(ctx: EnvContext): Promise<any> {
|
||||
const eventPath = ctx.get("GITHUB_EVENT_PATH");
|
||||
let cached = githubEventDataCache.get(eventPath);
|
||||
if (!cached) {
|
||||
cached = readFile(eventPath, "utf8").then((raw) => JSON.parse(raw));
|
||||
githubEventDataCache.set(eventPath, cached);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
|
||||
async function runCommand(args: Array<string>): Promise<string> {
|
||||
const result = Bun.spawnSync(args, {
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
});
|
||||
|
||||
if (result.success) {
|
||||
return result.stdout.toString();
|
||||
}
|
||||
|
||||
console.error(`Error running ${JSON.stringify(args)}: ${result.stderr}`);
|
||||
return "";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Regex that captures the variable name without the surrounding { } braces.
|
||||
const VAR_REGEX = /\{(CODEX_ACTION_[A-Z0-9_]+)\}/g;
|
||||
|
||||
// Cache individual placeholder values so each one is resolved at most once per
|
||||
// process even if many templates reference it.
|
||||
const placeholderCache: Map<string, Promise<string>> = new Map();
|
||||
|
||||
/**
|
||||
* Parse a template string, resolve all placeholders and return the rendered
|
||||
* result.
|
||||
*/
|
||||
export async function renderPromptTemplate(
|
||||
template: string,
|
||||
ctx: EnvContext,
|
||||
): Promise<string> {
|
||||
// ---------------------------------------------------------------------
|
||||
// 1) Gather all *unique* placeholders present in the template.
|
||||
// ---------------------------------------------------------------------
|
||||
const variables = new Set<string>();
|
||||
for (const match of template.matchAll(VAR_REGEX)) {
|
||||
variables.add(match[1]);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 2) Kick off (or reuse) async resolution for each variable.
|
||||
// ---------------------------------------------------------------------
|
||||
for (const variable of variables) {
|
||||
if (!placeholderCache.has(variable)) {
|
||||
placeholderCache.set(variable, resolveVariable(variable, ctx));
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 3) Await completion so we can perform a simple synchronous replace below.
|
||||
// ---------------------------------------------------------------------
|
||||
const resolvedEntries: [string, string][] = [];
|
||||
for (const [key, promise] of placeholderCache.entries()) {
|
||||
resolvedEntries.push([key, await promise]);
|
||||
}
|
||||
const resolvedMap = new Map<string, string>(resolvedEntries);
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 4) Replace each occurrence. We use replace with a callback to ensure
|
||||
// correct substitution even if variable names overlap (they shouldn't,
|
||||
// but better safe than sorry).
|
||||
// ---------------------------------------------------------------------
|
||||
return template.replace(VAR_REGEX, (_, varName: string) => {
|
||||
return resolvedMap.get(varName) ?? "";
|
||||
});
|
||||
}
|
||||
|
||||
export async function ensureBaseAndHeadCommitsForPRAreAvailable(
|
||||
ctx: EnvContext,
|
||||
): Promise<{ baseSha: string; headSha: string } | null> {
|
||||
const prShas = await getPrShas(ctx);
|
||||
if (prShas == null) {
|
||||
console.warn("Unable to resolve PR branches");
|
||||
return null;
|
||||
}
|
||||
|
||||
const event = await getGitHubEventData(ctx);
|
||||
const pr = event.pull_request;
|
||||
if (!pr) {
|
||||
console.warn("event.pull_request is not defined - unexpected");
|
||||
return null;
|
||||
}
|
||||
|
||||
const workspace = ctx.get("GITHUB_WORKSPACE");
|
||||
|
||||
// Refs (branch names)
|
||||
const baseRef: string | undefined = pr.base?.ref;
|
||||
const headRef: string | undefined = pr.head?.ref;
|
||||
|
||||
// Clone URLs
|
||||
const baseRemoteUrl: string | undefined = pr.base?.repo?.clone_url;
|
||||
const headRemoteUrl: string | undefined = pr.head?.repo?.clone_url;
|
||||
|
||||
if (!baseRef || !headRef || !baseRemoteUrl || !headRemoteUrl) {
|
||||
console.warn(
|
||||
"Missing PR ref or remote URL information - cannot fetch commits",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ensure we have the base branch.
|
||||
await runCommand([
|
||||
"git",
|
||||
"-C",
|
||||
workspace,
|
||||
"fetch",
|
||||
"--no-tags",
|
||||
"origin",
|
||||
baseRef,
|
||||
]);
|
||||
|
||||
// Ensure we have the head branch.
|
||||
if (headRemoteUrl === baseRemoteUrl) {
|
||||
// Same repository – the commit is available from `origin`.
|
||||
await runCommand([
|
||||
"git",
|
||||
"-C",
|
||||
workspace,
|
||||
"fetch",
|
||||
"--no-tags",
|
||||
"origin",
|
||||
headRef,
|
||||
]);
|
||||
} else {
|
||||
// Fork – make sure a `pr` remote exists that points at the fork. Attempting
|
||||
// to add a remote that already exists causes git to error, so we swallow
|
||||
// any non-zero exit codes from that specific command.
|
||||
await runCommand([
|
||||
"git",
|
||||
"-C",
|
||||
workspace,
|
||||
"remote",
|
||||
"add",
|
||||
"pr",
|
||||
headRemoteUrl,
|
||||
]);
|
||||
|
||||
// Whether adding succeeded or the remote already existed, attempt to fetch
|
||||
// the head ref from the `pr` remote.
|
||||
await runCommand([
|
||||
"git",
|
||||
"-C",
|
||||
workspace,
|
||||
"fetch",
|
||||
"--no-tags",
|
||||
"pr",
|
||||
headRef,
|
||||
]);
|
||||
}
|
||||
|
||||
return prShas;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers – still exported for use by other modules.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function resolvePrDiff(ctx: EnvContext): Promise<string> {
|
||||
const prShas = await ensureBaseAndHeadCommitsForPRAreAvailable(ctx);
|
||||
if (prShas == null) {
|
||||
console.warn("Unable to resolve PR branches");
|
||||
return "";
|
||||
}
|
||||
|
||||
const workspace = ctx.get("GITHUB_WORKSPACE");
|
||||
const { baseSha, headSha } = prShas;
|
||||
return runCommand([
|
||||
"git",
|
||||
"-C",
|
||||
workspace,
|
||||
"diff",
|
||||
"--color=never",
|
||||
`${baseSha}..${headSha}`,
|
||||
]);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Placeholder resolution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function resolveVariable(name: string, ctx: EnvContext): Promise<string> {
|
||||
switch (name) {
|
||||
case "CODEX_ACTION_ISSUE_TITLE": {
|
||||
const event = await getGitHubEventData(ctx);
|
||||
const issue = event.issue ?? event.pull_request;
|
||||
return issue?.title ?? "";
|
||||
}
|
||||
|
||||
case "CODEX_ACTION_ISSUE_BODY": {
|
||||
const event = await getGitHubEventData(ctx);
|
||||
const issue = event.issue ?? event.pull_request;
|
||||
return issue?.body ?? "";
|
||||
}
|
||||
|
||||
case "CODEX_ACTION_GITHUB_EVENT_PATH": {
|
||||
return ctx.get("GITHUB_EVENT_PATH");
|
||||
}
|
||||
|
||||
case "CODEX_ACTION_BASE_REF": {
|
||||
const event = await getGitHubEventData(ctx);
|
||||
return event?.pull_request?.base?.ref ?? "";
|
||||
}
|
||||
|
||||
case "CODEX_ACTION_HEAD_REF": {
|
||||
const event = await getGitHubEventData(ctx);
|
||||
return event?.pull_request?.head?.ref ?? "";
|
||||
}
|
||||
|
||||
case "CODEX_ACTION_PR_DIFF": {
|
||||
return resolvePrDiff(ctx);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Add new template variables here.
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
default: {
|
||||
// Unknown variable – leave it blank to avoid leaking placeholders to the
|
||||
// final prompt. The alternative would be to `fail()` here, but silently
|
||||
// ignoring unknown placeholders is more forgiving and better matches the
|
||||
// behaviour of typical template engines.
|
||||
console.warn(`Unknown template variable: ${name}`);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function getPrShas(
|
||||
ctx: EnvContext,
|
||||
): Promise<{ baseSha: string; headSha: string } | null> {
|
||||
const event = await getGitHubEventData(ctx);
|
||||
const pr = event.pull_request;
|
||||
if (!pr) {
|
||||
console.warn("event.pull_request is not defined");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Prefer explicit SHAs if available to avoid relying on local branch names.
|
||||
const baseSha: string | undefined = pr.base?.sha;
|
||||
const headSha: string | undefined = pr.head?.sha;
|
||||
|
||||
if (!baseSha || !headSha) {
|
||||
console.warn("one of base or head is not defined on event.pull_request");
|
||||
return null;
|
||||
}
|
||||
|
||||
return { baseSha, headSha };
|
||||
}
|
||||
42
.github/actions/codex/src/review.ts
vendored
Normal file
42
.github/actions/codex/src/review.ts
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
import type { EnvContext } from "./env-context";
|
||||
import { runCodex } from "./run-codex";
|
||||
import { postComment } from "./post-comment";
|
||||
import { addEyesReaction } from "./add-reaction";
|
||||
|
||||
/**
|
||||
* Handle `pull_request_review` events. We treat the review body the same way
|
||||
* as a normal comment.
|
||||
*/
|
||||
export async function onReview(ctx: EnvContext): Promise<void> {
|
||||
const triggerPhrase = ctx.tryGet("INPUT_TRIGGER_PHRASE");
|
||||
if (!triggerPhrase) {
|
||||
console.warn("Empty trigger phrase: skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
const reviewBody = ctx.tryGet("GITHUB_EVENT_REVIEW_BODY");
|
||||
|
||||
if (!reviewBody) {
|
||||
console.warn("Review body not found in environment: skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!reviewBody.includes(triggerPhrase)) {
|
||||
console.log(
|
||||
`Trigger phrase '${triggerPhrase}' not found: nothing to do for this review.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const prompt = reviewBody.replace(triggerPhrase, "").trim();
|
||||
|
||||
if (prompt.length === 0) {
|
||||
console.warn("Prompt is empty after removing trigger phrase: skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
await addEyesReaction(ctx);
|
||||
|
||||
const lastMessage = await runCodex(prompt, ctx);
|
||||
await postComment(lastMessage, ctx);
|
||||
}
|
||||
56
.github/actions/codex/src/run-codex.ts
vendored
Normal file
56
.github/actions/codex/src/run-codex.ts
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
import { fail } from "./fail";
|
||||
import { EnvContext } from "./env-context";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "node:path";
|
||||
import { readFile, mkdtemp } from "fs/promises";
|
||||
import { resolveWorkspacePath } from "./github-workspace";
|
||||
|
||||
/**
|
||||
* Runs the Codex CLI with the provided prompt and returns the output written
|
||||
* to the "last message" file.
|
||||
*/
|
||||
export async function runCodex(
|
||||
prompt: string,
|
||||
ctx: EnvContext,
|
||||
): Promise<string> {
|
||||
const OPENAI_API_KEY = ctx.get("OPENAI_API_KEY");
|
||||
|
||||
const tempDirPath = await mkdtemp(join(tmpdir(), "codex-"));
|
||||
const lastMessageOutput = join(tempDirPath, "codex-prompt.md");
|
||||
|
||||
const args = ["/usr/local/bin/codex-exec"];
|
||||
|
||||
const inputCodexArgs = ctx.tryGet("INPUT_CODEX_ARGS")?.trim();
|
||||
if (inputCodexArgs) {
|
||||
args.push(...inputCodexArgs.split(/\s+/));
|
||||
}
|
||||
|
||||
args.push("--output-last-message", lastMessageOutput, prompt);
|
||||
|
||||
const env: Record<string, string> = { ...process.env, OPENAI_API_KEY };
|
||||
const INPUT_CODEX_HOME = ctx.tryGet("INPUT_CODEX_HOME");
|
||||
if (INPUT_CODEX_HOME) {
|
||||
env.CODEX_HOME = resolveWorkspacePath(INPUT_CODEX_HOME, ctx);
|
||||
}
|
||||
|
||||
console.log(`Running Codex: ${JSON.stringify(args)}`);
|
||||
const result = Bun.spawnSync(args, {
|
||||
stdout: "inherit",
|
||||
stderr: "inherit",
|
||||
env,
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
fail(`Codex failed: see above for details.`);
|
||||
}
|
||||
|
||||
// Read the output generated by Codex.
|
||||
let lastMessage: string;
|
||||
try {
|
||||
lastMessage = await readFile(lastMessageOutput, "utf8");
|
||||
} catch (err) {
|
||||
fail(`Failed to read Codex output at '${lastMessageOutput}': ${err}`);
|
||||
}
|
||||
|
||||
return lastMessage;
|
||||
}
|
||||
33
.github/actions/codex/src/verify-inputs.ts
vendored
Normal file
33
.github/actions/codex/src/verify-inputs.ts
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
// Validate the inputs passed to the composite action.
|
||||
// The script currently ensures that the provided configuration file exists and
|
||||
// matches the expected schema.
|
||||
|
||||
import type { Config } from "./config";
|
||||
|
||||
import { existsSync } from "fs";
|
||||
import * as path from "path";
|
||||
import { fail } from "./fail";
|
||||
|
||||
export function performAdditionalValidation(config: Config, workspace: string) {
|
||||
// Additional validation: ensure referenced prompt files exist and are Markdown.
|
||||
for (const [label, details] of Object.entries(config.labels)) {
|
||||
// Determine which prompt key is present (the schema guarantees exactly one).
|
||||
const promptPathStr =
|
||||
(details as any).prompt ?? (details as any).promptPath;
|
||||
|
||||
if (promptPathStr) {
|
||||
const promptPath = path.isAbsolute(promptPathStr)
|
||||
? promptPathStr
|
||||
: path.join(workspace, promptPathStr);
|
||||
|
||||
if (!existsSync(promptPath)) {
|
||||
fail(`Prompt file for label '${label}' not found: ${promptPath}`);
|
||||
}
|
||||
if (!promptPath.endsWith(".md")) {
|
||||
fail(
|
||||
`Prompt file for label '${label}' must be a .md file (got ${promptPathStr}).`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
15
.github/actions/codex/tsconfig.json
vendored
Normal file
15
.github/actions/codex/tsconfig.json
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"lib": ["ESNext"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleDetection": "force",
|
||||
"moduleResolution": "bundler",
|
||||
|
||||
"noEmit": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true
|
||||
},
|
||||
|
||||
"include": ["src"]
|
||||
}
|
||||
3
.github/codex/home/config.toml
vendored
Normal file
3
.github/codex/home/config.toml
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
model = "o3"
|
||||
|
||||
# Consider setting [mcp_servers] here!
|
||||
9
.github/codex/labels/codex-attempt.md
vendored
Normal file
9
.github/codex/labels/codex-attempt.md
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
Attempt to solve the reported issue.
|
||||
|
||||
If a code change is required, create a new branch, commit the fix, and open a pull request that resolves the problem.
|
||||
|
||||
Here is the original GitHub issue that triggered this run:
|
||||
|
||||
### {CODEX_ACTION_ISSUE_TITLE}
|
||||
|
||||
{CODEX_ACTION_ISSUE_BODY}
|
||||
7
.github/codex/labels/codex-review.md
vendored
Normal file
7
.github/codex/labels/codex-review.md
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
Review this PR and respond with a very concise final message, formatted in Markdown.
|
||||
|
||||
There should be a summary of the changes (1-2 sentences) and a few bullet points if necessary.
|
||||
|
||||
Then provide the **review** (1-2 sentences plus bullet points, friendly tone).
|
||||
|
||||
{CODEX_ACTION_GITHUB_EVENT_PATH} contains the JSON that triggered this GitHub workflow. It contains the `base` and `head` refs that define this PR. Both refs are available locally.
|
||||
7
.github/codex/labels/codex-triage.md
vendored
Normal file
7
.github/codex/labels/codex-triage.md
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
Troubleshoot whether the reported issue is valid.
|
||||
|
||||
Provide a concise and respectful comment summarizing the findings.
|
||||
|
||||
### {CODEX_ACTION_ISSUE_TITLE}
|
||||
|
||||
{CODEX_ACTION_ISSUE_BODY}
|
||||
6
.github/dotslash-config.json
vendored
6
.github/dotslash-config.json
vendored
@@ -5,7 +5,7 @@
|
||||
"macos-aarch64": { "regex": "^codex-exec-aarch64-apple-darwin\\.zst$", "path": "codex-exec" },
|
||||
"macos-x86_64": { "regex": "^codex-exec-x86_64-apple-darwin\\.zst$", "path": "codex-exec" },
|
||||
"linux-x86_64": { "regex": "^codex-exec-x86_64-unknown-linux-musl\\.zst$", "path": "codex-exec" },
|
||||
"linux-aarch64": { "regex": "^codex-exec-aarch64-unknown-linux-gnu\\.zst$", "path": "codex-exec" }
|
||||
"linux-aarch64": { "regex": "^codex-exec-aarch64-unknown-linux-musl\\.zst$", "path": "codex-exec" }
|
||||
}
|
||||
},
|
||||
|
||||
@@ -14,14 +14,14 @@
|
||||
"macos-aarch64": { "regex": "^codex-aarch64-apple-darwin\\.zst$", "path": "codex" },
|
||||
"macos-x86_64": { "regex": "^codex-x86_64-apple-darwin\\.zst$", "path": "codex" },
|
||||
"linux-x86_64": { "regex": "^codex-x86_64-unknown-linux-musl\\.zst$", "path": "codex" },
|
||||
"linux-aarch64": { "regex": "^codex-aarch64-unknown-linux-gnu\\.zst$", "path": "codex" }
|
||||
"linux-aarch64": { "regex": "^codex-aarch64-unknown-linux-musl\\.zst$", "path": "codex" }
|
||||
}
|
||||
},
|
||||
|
||||
"codex-linux-sandbox": {
|
||||
"platforms": {
|
||||
"linux-x86_64": { "regex": "^codex-linux-sandbox-x86_64-unknown-linux-musl\\.zst$", "path": "codex-linux-sandbox" },
|
||||
"linux-aarch64": { "regex": "^codex-linux-sandbox-aarch64-unknown-linux-gnu\\.zst$", "path": "codex-linux-sandbox" }
|
||||
"linux-aarch64": { "regex": "^codex-linux-sandbox-aarch64-unknown-linux-musl\\.zst$", "path": "codex-linux-sandbox" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
95
.github/workflows/codex.yml
vendored
Normal file
95
.github/workflows/codex.yml
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
name: Codex
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [labeled]
|
||||
|
||||
jobs:
|
||||
codex:
|
||||
# This `if` check provides complex filtering logic to avoid running Codex
|
||||
# on every PR. Admittedly, one thing this does not verify is whether the
|
||||
# sender has write access to the repo: that must be done as part of a
|
||||
# runtime step.
|
||||
#
|
||||
# Note the label values should match the ones in the .github/codex/labels
|
||||
# folder.
|
||||
if: |
|
||||
(github.event_name == 'issues' && (
|
||||
(github.event.action == 'labeled' && (github.event.label.name == 'codex-attempt' || github.event.label.name == 'codex-triage'))
|
||||
)) ||
|
||||
(github.event_name == 'pull_request' && github.event.action == 'labeled' && github.event.label.name == 'codex-review')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # can push or create branches
|
||||
issues: write # for comments + labels on issues/PRs
|
||||
pull-requests: write # for PR comments/labels
|
||||
steps:
|
||||
# TODO: Consider adding an optional mode (--dry-run?) to actions/codex
|
||||
# that verifies whether Codex should actually be run for this event.
|
||||
# (For example, it may be rejected because the sender does not have
|
||||
# write access to the repo.) The benefit would be two-fold:
|
||||
# 1. As the first step of this job, it gives us a chance to add a reaction
|
||||
# or comment to the PR/issue ASAP to "ack" the request.
|
||||
# 2. It saves resources by skipping the clone and setup steps below if
|
||||
# Codex is not going to run.
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# We install the dependencies like we would for an ordinary CI job,
|
||||
# particularly because Codex will not have network access to install
|
||||
# these dependencies.
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10.8.1
|
||||
run_install: false
|
||||
|
||||
- name: Get pnpm store directory
|
||||
id: pnpm-cache
|
||||
shell: bash
|
||||
run: |
|
||||
echo "store_path=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Setup pnpm cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ steps.pnpm-cache.outputs.store_path }}
|
||||
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pnpm-store-
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
with:
|
||||
targets: x86_64-unknown-linux-gnu
|
||||
components: clippy
|
||||
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
${{ github.workspace }}/codex-rs/target/
|
||||
key: cargo-ubuntu-24.04-x86_64-unknown-linux-gnu-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
# Note it is possible that the `verify` step internal to Run Codex will
|
||||
# fail, in which case the work to setup the repo was worthless :(
|
||||
- name: Run Codex
|
||||
uses: ./.github/actions/codex
|
||||
with:
|
||||
openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
codex_home: ./.github/codex/home
|
||||
8
.github/workflows/rust-ci.yml
vendored
8
.github/workflows/rust-ci.yml
vendored
@@ -55,6 +55,10 @@ jobs:
|
||||
target: x86_64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-gnu
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-gnu
|
||||
- runner: windows-latest
|
||||
target: x86_64-pc-windows-msvc
|
||||
|
||||
@@ -75,7 +79,7 @@ jobs:
|
||||
${{ github.workspace }}/codex-rs/target/
|
||||
key: cargo-${{ matrix.runner }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' }}
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
|
||||
name: Install musl build tools
|
||||
run: |
|
||||
sudo apt install -y musl-tools pkg-config
|
||||
@@ -100,6 +104,8 @@ jobs:
|
||||
id: test
|
||||
continue-on-error: true
|
||||
run: cargo test --all-features --target ${{ matrix.target }}
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
# Fail the job if any of the previous steps failed.
|
||||
- name: verify all steps passed
|
||||
|
||||
69
.github/workflows/rust-release.yml
vendored
69
.github/workflows/rust-release.yml
vendored
@@ -15,9 +15,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
TAG_REGEX: '^rust-v[0-9]+\.[0-9]+\.[0-9]+$'
|
||||
|
||||
jobs:
|
||||
tag-check:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -33,8 +30,8 @@ jobs:
|
||||
# 1. Must be a tag and match the regex
|
||||
[[ "${GITHUB_REF_TYPE}" == "tag" ]] \
|
||||
|| { echo "❌ Not a tag push"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ${TAG_REGEX} ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' != ${TAG_REGEX}"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ^rust-v[0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta)(\.[0-9]+)?)?$ ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' doesn't match expected format"; exit 1; }
|
||||
|
||||
# 2. Extract versions
|
||||
tag_ver="${GITHUB_REF_NAME#rust-v}"
|
||||
@@ -69,6 +66,8 @@ jobs:
|
||||
target: x86_64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-gnu
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-gnu
|
||||
|
||||
@@ -88,7 +87,7 @@ jobs:
|
||||
${{ github.workspace }}/codex-rs/target/
|
||||
key: cargo-release-${{ matrix.runner }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' }}
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
|
||||
name: Install musl build tools
|
||||
run: |
|
||||
sudo apt install -y musl-tools pkg-config
|
||||
@@ -105,7 +104,10 @@ jobs:
|
||||
cp target/${{ matrix.target }}/release/codex-exec "$dest/codex-exec-${{ matrix.target }}"
|
||||
cp target/${{ matrix.target }}/release/codex "$dest/codex-${{ matrix.target }}"
|
||||
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'x86_64-unknown-linux-gnu' || matrix.target == 'aarch64-unknown-linux-gnu' }}
|
||||
# After https://github.com/openai/codex/pull/1228 is merged and a new
|
||||
# release is cut with an artifacts built after that PR, the `-gnu`
|
||||
# variants can go away as we will only use the `-musl` variants.
|
||||
- if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'x86_64-unknown-linux-gnu' || matrix.target == 'aarch64-unknown-linux-gnu' || matrix.target == 'aarch64-unknown-linux-musl' }}
|
||||
name: Stage Linux-only artifacts
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -115,20 +117,47 @@ jobs:
|
||||
- name: Compress artifacts
|
||||
shell: bash
|
||||
run: |
|
||||
# Path that contains the uncompressed binaries for the current
|
||||
# ${{ matrix.target }}
|
||||
dest="dist/${{ matrix.target }}"
|
||||
zstd -T0 -19 --rm "$dest"/*
|
||||
|
||||
# For compatibility with environments that lack the `zstd` tool we
|
||||
# additionally create a `.tar.gz` alongside every single binary that
|
||||
# we publish. The end result is:
|
||||
# codex-<target>.zst (existing)
|
||||
# codex-<target>.tar.gz (new)
|
||||
# ...same naming for codex-exec-* and codex-linux-sandbox-*
|
||||
|
||||
# 1. Produce a .tar.gz for every file in the directory *before* we
|
||||
# run `zstd --rm`, because that flag deletes the original files.
|
||||
for f in "$dest"/*; do
|
||||
base="$(basename "$f")"
|
||||
# Skip files that are already archives (shouldn't happen, but be
|
||||
# safe).
|
||||
if [[ "$base" == *.tar.gz ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Create per-binary tar.gz
|
||||
tar -C "$dest" -czf "$dest/${base}.tar.gz" "$base"
|
||||
|
||||
# Also create .zst (existing behaviour) *and* remove the original
|
||||
# uncompressed binary to keep the directory small.
|
||||
zstd -T0 -19 --rm "$dest/$base"
|
||||
done
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.target }}
|
||||
path: codex-rs/dist/${{ matrix.target }}/*
|
||||
# Upload the per-binary .zst files as well as the new .tar.gz
|
||||
# equivalents we generated in the previous step.
|
||||
path: |
|
||||
codex-rs/dist/${{ matrix.target }}/*
|
||||
|
||||
release:
|
||||
needs: build
|
||||
name: release
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
RELEASE_TAG: codex-rs-${{ github.sha }}-${{ github.run_attempt }}-${{ github.ref_name }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
@@ -138,9 +167,19 @@ jobs:
|
||||
- name: List
|
||||
run: ls -R dist/
|
||||
|
||||
- uses: softprops/action-gh-release@v2
|
||||
- name: Define release name
|
||||
id: release_name
|
||||
run: |
|
||||
# Extract the version from the tag name, which is in the format
|
||||
# "rust-v0.1.0".
|
||||
version="${GITHUB_REF_NAME#rust-v}"
|
||||
echo "name=${version}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.RELEASE_TAG }}
|
||||
name: ${{ steps.release_name.outputs.name }}
|
||||
tag_name: ${{ github.ref_name }}
|
||||
files: dist/**
|
||||
# For now, tag releases as "prerelease" because we are not claiming
|
||||
# the Rust CLI is stable yet.
|
||||
@@ -150,5 +189,5 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag: ${{ env.RELEASE_TAG }}
|
||||
tag: ${{ github.ref_name }}
|
||||
config: .github/dotslash-config.json
|
||||
|
||||
16
CHANGELOG.md
16
CHANGELOG.md
@@ -2,6 +2,22 @@
|
||||
|
||||
You can install any of these versions: `npm install -g codex@version`
|
||||
|
||||
## `0.1.2505172129`
|
||||
|
||||
### 🪲 Bug Fixes
|
||||
|
||||
- Add node version check (#1007)
|
||||
- Persist token after refresh (#1006)
|
||||
|
||||
## `0.1.2505171619`
|
||||
|
||||
- `codex --login` + `codex --free` (#998)
|
||||
|
||||
## `0.1.2505161800`
|
||||
|
||||
- Sign in with chatgpt credits (#974)
|
||||
- Add support for OpenAI tool type, local_shell (#961)
|
||||
|
||||
## `0.1.2505161243`
|
||||
|
||||
- Sign in with chatgpt (#963)
|
||||
|
||||
@@ -469,7 +469,7 @@ export OPENAI_API_KEY="your-api-key-here"
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_API_KEY="your-azure-api-key-here"
|
||||
export AZURE_OPENAI_API_VERSION="2025-03-01-preview" (Optional)
|
||||
export AZURE_OPENAI_API_VERSION="2025-04-01-preview" (Optional)
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY="your-openrouter-key-here"
|
||||
|
||||
@@ -16,14 +16,23 @@
|
||||
*/
|
||||
|
||||
import { spawnSync } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath, pathToFileURL } from "url";
|
||||
|
||||
// Determine whether the user explicitly wants the Rust CLI.
|
||||
const wantsNative =
|
||||
process.env.CODEX_RUST != null
|
||||
|
||||
// __dirname equivalent in ESM
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// For the @native release of the Node module, the `use-native` file is added,
|
||||
// indicating we should default to the native binary. For other releases,
|
||||
// setting CODEX_RUST=1 will opt-in to the native binary, if included.
|
||||
const wantsNative = fs.existsSync(path.join(__dirname, "use-native")) ||
|
||||
(process.env.CODEX_RUST != null
|
||||
? ["1", "true", "yes"].includes(process.env.CODEX_RUST.toLowerCase())
|
||||
: false;
|
||||
: false);
|
||||
|
||||
// Try native binary if requested.
|
||||
if (wantsNative) {
|
||||
@@ -37,7 +46,7 @@ if (wantsNative) {
|
||||
targetTriple = "x86_64-unknown-linux-musl";
|
||||
break;
|
||||
case "arm64":
|
||||
targetTriple = "aarch64-unknown-linux-gnu";
|
||||
targetTriple = "aarch64-unknown-linux-musl";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -63,10 +72,6 @@ if (wantsNative) {
|
||||
throw new Error(`Unsupported platform: ${platform} (${arch})`);
|
||||
}
|
||||
|
||||
// __dirname equivalent in ESM
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
|
||||
const result = spawnSync(binaryPath, process.argv.slice(2), {
|
||||
stdio: "inherit",
|
||||
@@ -78,10 +83,6 @@ if (wantsNative) {
|
||||
|
||||
// Fallback: execute the original JavaScript CLI.
|
||||
|
||||
// Determine this script's directory
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Resolve the path to the compiled CLI bundle
|
||||
const cliPath = path.resolve(__dirname, "../dist/cli.js");
|
||||
const cliUrl = pathToFileURL(cliPath).href;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@openai/codex",
|
||||
"version": "0.1.2504301751",
|
||||
"version": "0.0.0-dev",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"codex": "bin/codex.js"
|
||||
|
||||
@@ -65,7 +65,7 @@ mkdir -p "$BIN_DIR"
|
||||
# Until we start publishing stable GitHub releases, we have to grab the binaries
|
||||
# from the GitHub Action that created them. Update the URL below to point to the
|
||||
# appropriate workflow run:
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/14950726936"
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15483730027"
|
||||
WORKFLOW_ID="${WORKFLOW_URL##*/}"
|
||||
|
||||
ARTIFACTS_DIR="$(mktemp -d)"
|
||||
@@ -78,7 +78,7 @@ gh run download --dir "$ARTIFACTS_DIR" --repo openai/codex "$WORKFLOW_ID"
|
||||
zstd -d "$ARTIFACTS_DIR/x86_64-unknown-linux-musl/codex-linux-sandbox-x86_64-unknown-linux-musl.zst" \
|
||||
-o "$BIN_DIR/codex-linux-sandbox-x64"
|
||||
|
||||
zstd -d "$ARTIFACTS_DIR/aarch64-unknown-linux-gnu/codex-linux-sandbox-aarch64-unknown-linux-gnu.zst" \
|
||||
zstd -d "$ARTIFACTS_DIR/aarch64-unknown-linux-musl/codex-linux-sandbox-aarch64-unknown-linux-musl.zst" \
|
||||
-o "$BIN_DIR/codex-linux-sandbox-arm64"
|
||||
|
||||
if [[ "$INCLUDE_RUST" -eq 1 ]]; then
|
||||
@@ -86,8 +86,8 @@ if [[ "$INCLUDE_RUST" -eq 1 ]]; then
|
||||
zstd -d "$ARTIFACTS_DIR/x86_64-unknown-linux-musl/codex-x86_64-unknown-linux-musl.zst" \
|
||||
-o "$BIN_DIR/codex-x86_64-unknown-linux-musl"
|
||||
# ARM64 Linux
|
||||
zstd -d "$ARTIFACTS_DIR/aarch64-unknown-linux-gnu/codex-aarch64-unknown-linux-gnu.zst" \
|
||||
-o "$BIN_DIR/codex-aarch64-unknown-linux-gnu"
|
||||
zstd -d "$ARTIFACTS_DIR/aarch64-unknown-linux-musl/codex-aarch64-unknown-linux-musl.zst" \
|
||||
-o "$BIN_DIR/codex-aarch64-unknown-linux-musl"
|
||||
# x64 macOS
|
||||
zstd -d "$ARTIFACTS_DIR/x86_64-apple-darwin/codex-x86_64-apple-darwin.zst" \
|
||||
-o "$BIN_DIR/codex-x86_64-apple-darwin"
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
# When --native is supplied we copy the linux-sandbox binaries (as before) and
|
||||
# additionally fetch / unpack the two Rust targets that we currently support:
|
||||
# - x86_64-unknown-linux-musl
|
||||
# - aarch64-unknown-linux-gnu
|
||||
# - aarch64-unknown-linux-musl
|
||||
#
|
||||
# NOTE: This script is intended to be run from the repository root via
|
||||
# `pnpm --filter codex-cli stage-release ...` or inside codex-cli with the
|
||||
@@ -122,6 +122,7 @@ jq --arg version "$VERSION" \
|
||||
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
./scripts/install_native_deps.sh "$TMPDIR" --full-native
|
||||
touch "${TMPDIR}/bin/use-native"
|
||||
else
|
||||
./scripts/install_native_deps.sh "$TMPDIR"
|
||||
fi
|
||||
@@ -130,11 +131,12 @@ popd >/dev/null
|
||||
|
||||
echo "Staged version $VERSION for release in $TMPDIR"
|
||||
|
||||
echo "Test Node:"
|
||||
echo " node ${TMPDIR}/bin/codex.js --help"
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
echo "Test Rust:"
|
||||
echo " CODEX_RUST=1 node ${TMPDIR}/bin/codex.js --help"
|
||||
echo " node ${TMPDIR}/bin/codex.js --help"
|
||||
else
|
||||
echo "Test Node:"
|
||||
echo " node ${TMPDIR}/bin/codex.js --help"
|
||||
fi
|
||||
|
||||
# Print final hint for convenience
|
||||
|
||||
@@ -1,6 +1,19 @@
|
||||
#!/usr/bin/env node
|
||||
import "dotenv/config";
|
||||
|
||||
// Exit early if on an older version of Node.js (< 22)
|
||||
const major = process.versions.node.split(".").map(Number)[0]!;
|
||||
if (major < 22) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
"\n" +
|
||||
"Codex CLI requires Node.js version 22 or newer.\n" +
|
||||
`You are running Node.js v${process.versions.node}.\n` +
|
||||
"Please upgrade Node.js: https://nodejs.org/en/download/\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Hack to suppress deprecation warnings (punycode)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(process as any).noDeprecation = true;
|
||||
@@ -24,11 +37,15 @@ import {
|
||||
PRETTY_PRINT,
|
||||
INSTRUCTIONS_FILEPATH,
|
||||
} from "./utils/config";
|
||||
import { getApiKey as fetchApiKey } from "./utils/get-api-key";
|
||||
import {
|
||||
getApiKey as fetchApiKey,
|
||||
maybeRedeemCredits,
|
||||
} from "./utils/get-api-key";
|
||||
import { createInputItem } from "./utils/input-utils";
|
||||
import { initLogger } from "./utils/logger/log";
|
||||
import { isModelSupportedForResponses } from "./utils/model-utils.js";
|
||||
import { parseToolCall } from "./utils/parsers";
|
||||
import { providers } from "./utils/providers";
|
||||
import { onExit, setInkRenderer } from "./utils/terminal";
|
||||
import chalk from "chalk";
|
||||
import { spawnSync } from "child_process";
|
||||
@@ -63,6 +80,8 @@ const cli = meow(
|
||||
-i, --image <path> Path(s) to image files to include as input
|
||||
-v, --view <rollout> Inspect a previously saved rollout instead of starting a session
|
||||
--history Browse previous sessions
|
||||
--login Start a new sign in flow
|
||||
--free Retry redeeming free credits
|
||||
-q, --quiet Non-interactive mode that only prints the assistant's final output
|
||||
-c, --config Open the instructions file in your editor
|
||||
-w, --writable-root <path> Writable folder for sandbox in full-auto mode (can be specified multiple times)
|
||||
@@ -108,6 +127,8 @@ const cli = meow(
|
||||
version: { type: "boolean", description: "Print version and exit" },
|
||||
view: { type: "string" },
|
||||
history: { type: "boolean", description: "Browse previous sessions" },
|
||||
login: { type: "boolean", description: "Force a new sign in flow" },
|
||||
free: { type: "boolean", description: "Retry redeeming free credits" },
|
||||
model: { type: "string", aliases: ["m"] },
|
||||
provider: { type: "string", aliases: ["p"] },
|
||||
image: { type: "string", isMultiple: true, aliases: ["i"] },
|
||||
@@ -279,6 +300,13 @@ const client = {
|
||||
};
|
||||
|
||||
let apiKey = "";
|
||||
let savedTokens:
|
||||
| {
|
||||
id_token?: string;
|
||||
access_token?: string;
|
||||
refresh_token: string;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
// Try to load existing auth file if present
|
||||
try {
|
||||
@@ -287,6 +315,7 @@ try {
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
const lastRefreshTime = data.last_refresh
|
||||
? new Date(data.last_refresh).getTime()
|
||||
: 0;
|
||||
@@ -299,12 +328,59 @@ try {
|
||||
// ignore errors
|
||||
}
|
||||
|
||||
if (!apiKey) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
// Get provider-specific API key if not OpenAI
|
||||
if (provider.toLowerCase() !== "openai") {
|
||||
const providerInfo = providers[provider.toLowerCase()];
|
||||
if (providerInfo) {
|
||||
const providerApiKey = process.env[providerInfo.envKey];
|
||||
if (providerApiKey) {
|
||||
apiKey = providerApiKey;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only proceed with OpenAI auth flow if:
|
||||
// 1. Provider is OpenAI and no API key is set, or
|
||||
// 2. Login flag is explicitly set
|
||||
if (provider.toLowerCase() === "openai" && !apiKey) {
|
||||
if (cli.flags.login) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the API key is available as an environment variable for legacy code
|
||||
process.env["OPENAI_API_KEY"] = apiKey;
|
||||
|
||||
// Only attempt credit redemption for OpenAI provider
|
||||
if (cli.flags.free && provider.toLowerCase() === "openai") {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`${chalk.bold("codex --free")} attempting to redeem credits...`);
|
||||
if (!savedTokens?.refresh_token) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id, true);
|
||||
// fetchApiKey includes credit redemption as the end of the flow
|
||||
} else {
|
||||
await maybeRedeemCredits(
|
||||
client.issuer,
|
||||
client.client_id,
|
||||
savedTokens.refresh_token,
|
||||
savedTokens.id_token,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Set of providers that don't require API keys
|
||||
const NO_API_KEY_REQUIRED = new Set(["ollama"]);
|
||||
|
||||
@@ -322,13 +398,18 @@ if (!apiKey && !NO_API_KEY_REQUIRED.has(provider.toLowerCase())) {
|
||||
? `You can create a key here: ${chalk.bold(
|
||||
chalk.underline("https://platform.openai.com/account/api-keys"),
|
||||
)}\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
: provider.toLowerCase() === "azure"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
`${provider.toUpperCase()}_OPENAI_API_KEY`,
|
||||
)} ` +
|
||||
`in Azure AI Foundry portal at ${chalk.bold(chalk.underline("https://ai.azure.com"))}.\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
}`,
|
||||
);
|
||||
process.exit(1);
|
||||
|
||||
@@ -800,7 +800,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
@@ -1188,7 +1189,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
|
||||
@@ -69,7 +69,7 @@ export const OPENAI_BASE_URL = process.env["OPENAI_BASE_URL"] || "";
|
||||
export let OPENAI_API_KEY = process.env["OPENAI_API_KEY"] || "";
|
||||
|
||||
export const AZURE_OPENAI_API_VERSION =
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-03-01-preview";
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-04-01-preview";
|
||||
|
||||
export const DEFAULT_REASONING_EFFORT = "high";
|
||||
export const OPENAI_ORGANIZATION = process.env["OPENAI_ORGANIZATION"] || "";
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { Choice } from "./get-api-key-components";
|
||||
import type { Request, Response } from "express";
|
||||
|
||||
import { ApiKeyPrompt, WaitingForAuth } from "./get-api-key-components";
|
||||
import { clearTerminal } from "./terminal";
|
||||
import chalk from "chalk";
|
||||
import express from "express";
|
||||
import fs from "fs/promises";
|
||||
import { render } from "ink";
|
||||
@@ -51,11 +51,15 @@ async function getOidcConfiguration(
|
||||
}
|
||||
|
||||
interface IDTokenClaims {
|
||||
"exp": number;
|
||||
"https://api.openai.com/auth": {
|
||||
organization_id: string;
|
||||
project_id: string;
|
||||
completed_platform_onboarding: boolean;
|
||||
is_org_owner: boolean;
|
||||
chatgpt_subscription_active_start: string;
|
||||
chatgpt_subscription_active_until: string;
|
||||
chatgpt_plan_type: string;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -77,6 +81,206 @@ function generatePKCECodes(): {
|
||||
return { code_verifier, code_challenge };
|
||||
}
|
||||
|
||||
async function maybeRedeemCredits(
|
||||
issuer: string,
|
||||
clientId: string,
|
||||
refreshToken: string,
|
||||
idToken?: string,
|
||||
): Promise<void> {
|
||||
try {
|
||||
let currentIdToken = idToken;
|
||||
let idClaims: IDTokenClaims | undefined;
|
||||
|
||||
if (
|
||||
currentIdToken &&
|
||||
typeof currentIdToken === "string" &&
|
||||
currentIdToken.split(".")[1]
|
||||
) {
|
||||
idClaims = JSON.parse(
|
||||
Buffer.from(currentIdToken.split(".")[1]!, "base64url").toString(
|
||||
"utf8",
|
||||
),
|
||||
) as IDTokenClaims;
|
||||
} else {
|
||||
currentIdToken = "";
|
||||
}
|
||||
|
||||
// Validate idToken expiration
|
||||
// if expired, attempt token-exchange for a fresh idToken
|
||||
if (!idClaims || !idClaims.exp || Date.now() >= idClaims.exp * 1000) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(chalk.dim("Refreshing credentials..."));
|
||||
try {
|
||||
const refreshRes = await fetch("https://auth.openai.com/oauth/token", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
client_id: clientId,
|
||||
grant_type: "refresh_token",
|
||||
refresh_token: refreshToken,
|
||||
scope: "openid profile email",
|
||||
}),
|
||||
});
|
||||
if (!refreshRes.ok) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`Failed to refresh credentials: ${refreshRes.status} ${refreshRes.statusText}\n${chalk.dim(await refreshRes.text())}`,
|
||||
);
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`Please sign in again to redeem credits: ${chalk.bold("codex --login")}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
const refreshData = (await refreshRes.json()) as {
|
||||
id_token: string;
|
||||
refresh_token?: string;
|
||||
};
|
||||
currentIdToken = refreshData.id_token;
|
||||
idClaims = JSON.parse(
|
||||
Buffer.from(currentIdToken.split(".")[1]!, "base64url").toString(
|
||||
"utf8",
|
||||
),
|
||||
) as IDTokenClaims;
|
||||
if (refreshData.refresh_token) {
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
const existingJson = JSON.parse(
|
||||
await fs.readFile(authFile, "utf-8"),
|
||||
);
|
||||
existingJson.tokens.id_token = currentIdToken;
|
||||
existingJson.tokens.refresh_token = refreshData.refresh_token;
|
||||
existingJson.last_refresh = new Date().toISOString();
|
||||
await fs.writeFile(
|
||||
authFile,
|
||||
JSON.stringify(existingJson, null, 2),
|
||||
{ mode: 0o600 },
|
||||
);
|
||||
} catch (err) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Unable to update refresh token in auth file:", err);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Unable to refresh ID token via token-exchange:", err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm the subscription is active for more than 7 days
|
||||
const subStart =
|
||||
idClaims["https://api.openai.com/auth"]
|
||||
?.chatgpt_subscription_active_start;
|
||||
if (
|
||||
typeof subStart === "string" &&
|
||||
Date.now() - new Date(subStart).getTime() < 7 * 24 * 60 * 60 * 1000
|
||||
) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
"Sorry, your subscription must be active for more than 7 days to redeem credits.\nMore info: " +
|
||||
chalk.dim("https://help.openai.com/en/articles/11381614") +
|
||||
chalk.bold(
|
||||
"\nPlease try again on " +
|
||||
new Date(
|
||||
new Date(subStart).getTime() + 7 * 24 * 60 * 60 * 1000,
|
||||
).toLocaleDateString() +
|
||||
" " +
|
||||
new Date(
|
||||
new Date(subStart).getTime() + 7 * 24 * 60 * 60 * 1000,
|
||||
).toLocaleTimeString(),
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const completed = Boolean(
|
||||
idClaims["https://api.openai.com/auth"]?.completed_platform_onboarding,
|
||||
);
|
||||
const isOwner = Boolean(
|
||||
idClaims["https://api.openai.com/auth"]?.is_org_owner,
|
||||
);
|
||||
const needsSetup = !completed && isOwner;
|
||||
|
||||
const planType = idClaims["https://api.openai.com/auth"]
|
||||
?.chatgpt_plan_type as string | undefined;
|
||||
|
||||
if (needsSetup || !(planType === "plus" || planType === "pro")) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
"Users with Plus or Pro subscriptions can redeem free API credits.\nMore info: " +
|
||||
chalk.dim("https://help.openai.com/en/articles/11381614"),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const apiHost =
|
||||
issuer === "https://auth.openai.com"
|
||||
? "https://api.openai.com"
|
||||
: "https://api.openai.org";
|
||||
|
||||
const redeemRes = await fetch(`${apiHost}/v1/billing/redeem_credits`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ id_token: currentIdToken }),
|
||||
});
|
||||
|
||||
if (!redeemRes.ok) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn(
|
||||
`Credit redemption request failed: ${redeemRes.status} ${redeemRes.statusText}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const redeemData = (await redeemRes.json()) as {
|
||||
granted_chatgpt_subscriber_api_credits?: number;
|
||||
};
|
||||
const granted = redeemData?.granted_chatgpt_subscriber_api_credits ?? 0;
|
||||
if (granted > 0) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(
|
||||
chalk.green(
|
||||
`${chalk.bold(
|
||||
`Thanks for being a ChatGPT ${
|
||||
planType === "plus" ? "Plus" : "Pro"
|
||||
} subscriber!`,
|
||||
)}\nIf you haven't already redeemed, you should receive ${
|
||||
planType === "plus" ? "$5" : "$50"
|
||||
} in API credits\nCredits: ${chalk.dim(chalk.underline("https://platform.openai.com/settings/organization/billing/credit-grants"))}\nMore info: ${chalk.dim(chalk.underline("https://help.openai.com/en/articles/11381614"))}`,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(
|
||||
chalk.green(
|
||||
`It looks like no credits were granted:\n${JSON.stringify(
|
||||
redeemData,
|
||||
null,
|
||||
2,
|
||||
)}\nCredits: ${chalk.dim(
|
||||
chalk.underline(
|
||||
"https://platform.openai.com/settings/organization/billing/credit-grants",
|
||||
),
|
||||
)}\nMore info: ${chalk.dim(
|
||||
chalk.underline("https://help.openai.com/en/articles/11381614"),
|
||||
)}`,
|
||||
),
|
||||
);
|
||||
}
|
||||
} catch (parseErr) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Unable to parse credit redemption response:", parseErr);
|
||||
}
|
||||
} catch (err) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.warn("Unable to redeem ChatGPT subscriber API credits:", err);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleCallback(
|
||||
req: Request,
|
||||
issuer: string,
|
||||
@@ -121,9 +325,9 @@ async function handleCallback(
|
||||
}
|
||||
|
||||
const tokenData = (await tokenRes.json()) as {
|
||||
access_token: string;
|
||||
id_token: string;
|
||||
refresh_token?: string;
|
||||
access_token: string;
|
||||
refresh_token: string;
|
||||
};
|
||||
|
||||
const idTokenParts = tokenData.id_token.split(".");
|
||||
@@ -178,6 +382,8 @@ async function handleCallback(
|
||||
|
||||
const exchanged = (await exchangeRes.json()) as {
|
||||
access_token: string;
|
||||
// NOTE(mbolin): I did not see the "key" property set in practice. Note
|
||||
// this property is not read by the code.
|
||||
key: string;
|
||||
};
|
||||
|
||||
@@ -189,10 +395,10 @@ async function handleCallback(
|
||||
);
|
||||
const chatgptPlanType =
|
||||
accessTokenClaims["https://api.openai.com/auth"]?.chatgpt_plan_type;
|
||||
let needsSetup = false;
|
||||
if (chatgptPlanType === "plus" || chatgptPlanType === "pro") {
|
||||
needsSetup = !completedOnboarding;
|
||||
}
|
||||
const isOrgOwner = Boolean(
|
||||
idTokenClaims["https://api.openai.com/auth"]?.is_org_owner,
|
||||
);
|
||||
const needsSetup = !completedOnboarding && isOrgOwner;
|
||||
|
||||
// Build the success URL on the same host/port as the callback and
|
||||
// include the required query parameters for the front-end page.
|
||||
@@ -230,6 +436,13 @@ async function handleCallback(
|
||||
console.warn("Unable to save auth file:", err);
|
||||
}
|
||||
|
||||
await maybeRedeemCredits(
|
||||
issuer,
|
||||
clientId,
|
||||
tokenData.refresh_token,
|
||||
tokenData.id_token,
|
||||
);
|
||||
|
||||
return {
|
||||
access_token: exchanged.access_token,
|
||||
success_url: successUrl.toString(),
|
||||
@@ -363,11 +576,62 @@ const LOGIN_SUCCESS_HTML = String.raw`
|
||||
</div>
|
||||
<div class="title">Signed in to Codex CLI</div>
|
||||
</div>
|
||||
<div class="close-box">
|
||||
<div class="close-box" style="display: none;">
|
||||
<div class="setup-description">You may now close this page</div>
|
||||
</div>
|
||||
<div class="setup-box" style="display: none;">
|
||||
<div class="setup-content">
|
||||
<div class="setup-text">
|
||||
<div class="setup-title">Finish setting up your API organization</div>
|
||||
<div class="setup-description">Add a payment method to use your organization.</div>
|
||||
</div>
|
||||
<div class="redirect-box">
|
||||
<div data-hasendicon="false" data-hasstarticon="false" data-ishovered="false" data-isinactive="false" data-ispressed="false" data-size="large" data-type="primary" class="redirect-button">
|
||||
<div class="redirect-text">Redirecting in 3s...</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
(function () {
|
||||
const params = new URLSearchParams(window.location.search);
|
||||
const needsSetup = params.get('needs_setup') === 'true';
|
||||
const platformUrl = params.get('platform_url') || 'https://platform.openai.com';
|
||||
const orgId = params.get('org_id');
|
||||
const projectId = params.get('project_id');
|
||||
const planType = params.get('plan_type');
|
||||
const idToken = params.get('id_token');
|
||||
// Show different message and optional redirect when setup is required
|
||||
if (needsSetup) {
|
||||
const setupBox = document.querySelector('.setup-box');
|
||||
setupBox.style.display = 'flex';
|
||||
const redirectUrlObj = new URL('/org-setup', platformUrl);
|
||||
redirectUrlObj.searchParams.set('p', planType);
|
||||
redirectUrlObj.searchParams.set('t', idToken);
|
||||
redirectUrlObj.searchParams.set('with_org', orgId);
|
||||
redirectUrlObj.searchParams.set('project_id', projectId);
|
||||
const redirectUrl = redirectUrlObj.toString();
|
||||
const message = document.querySelector('.redirect-text');
|
||||
let countdown = 3;
|
||||
function tick() {
|
||||
message.textContent =
|
||||
'Redirecting in ' + countdown + 's…';
|
||||
if (countdown === 0) {
|
||||
window.location.replace(redirectUrl);
|
||||
} else {
|
||||
countdown -= 1;
|
||||
setTimeout(tick, 1000);
|
||||
}
|
||||
}
|
||||
tick();
|
||||
} else {
|
||||
const closeBox = document.querySelector('.close-box');
|
||||
closeBox.style.display = 'flex';
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>`;
|
||||
|
||||
@@ -475,8 +739,9 @@ async function signInFlow(issuer: string, clientId: string): Promise<string> {
|
||||
export async function getApiKey(
|
||||
issuer: string,
|
||||
clientId: string,
|
||||
forceLogin: boolean = false,
|
||||
): Promise<string> {
|
||||
if (process.env["OPENAI_API_KEY"]) {
|
||||
if (!forceLogin && process.env["OPENAI_API_KEY"]) {
|
||||
return process.env["OPENAI_API_KEY"]!;
|
||||
}
|
||||
const choice = await promptUserForChoice();
|
||||
@@ -487,12 +752,15 @@ export async function getApiKey(
|
||||
const spinner = render(<WaitingForAuth />);
|
||||
try {
|
||||
const key = await signInFlow(issuer, clientId);
|
||||
spinner.clear();
|
||||
spinner.unmount();
|
||||
clearTerminal();
|
||||
process.env["OPENAI_API_KEY"] = key;
|
||||
return key;
|
||||
} catch (err) {
|
||||
spinner.clear();
|
||||
spinner.unmount();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
export { maybeRedeemCredits };
|
||||
|
||||
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* tests/agent-azure-responses-endpoint.test.ts
|
||||
*
|
||||
* Verifies that AgentLoop calls the `/responses` endpoint when provider is set to Azure.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
|
||||
// Fake stream that yields a completed response event
|
||||
class FakeStream {
|
||||
async *[Symbol.asyncIterator]() {
|
||||
yield {
|
||||
type: "response.completed",
|
||||
response: { id: "azure_resp", status: "completed", output: [] },
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
|
||||
let lastCreateParams: any = null;
|
||||
|
||||
vi.mock("openai", () => {
|
||||
class FakeDefaultClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class FakeAzureClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class APIConnectionTimeoutError extends Error {}
|
||||
return {
|
||||
__esModule: true,
|
||||
default: FakeDefaultClient,
|
||||
AzureOpenAI: FakeAzureClient,
|
||||
APIConnectionTimeoutError,
|
||||
};
|
||||
});
|
||||
|
||||
// Stub approvals to bypass command approval logic
|
||||
vi.mock("../src/approvals.js", () => ({
|
||||
__esModule: true,
|
||||
alwaysApprovedCommands: new Set<string>(),
|
||||
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }),
|
||||
isSafeCommand: () => null,
|
||||
}));
|
||||
|
||||
// Stub format-command to avoid formatting side effects
|
||||
vi.mock("../src/format-command.js", () => ({
|
||||
__esModule: true,
|
||||
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
|
||||
}));
|
||||
|
||||
// Stub internal logging to keep output clean
|
||||
vi.mock("../src/utils/agent/log.js", () => ({
|
||||
__esModule: true,
|
||||
log: () => {},
|
||||
isLoggingEnabled: () => false,
|
||||
}));
|
||||
|
||||
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
||||
|
||||
describe("AgentLoop Azure provider responses endpoint", () => {
|
||||
beforeEach(() => {
|
||||
lastCreateParams = null;
|
||||
});
|
||||
|
||||
it("calls the /responses endpoint when provider is azure", async () => {
|
||||
const cfg: any = {
|
||||
model: "test-model",
|
||||
provider: "azure",
|
||||
instructions: "",
|
||||
disableResponseStorage: false,
|
||||
notify: false,
|
||||
};
|
||||
const loop = new AgentLoop({
|
||||
additionalWritableRoots: [],
|
||||
model: cfg.model,
|
||||
config: cfg,
|
||||
instructions: cfg.instructions,
|
||||
approvalPolicy: { mode: "suggest" } as any,
|
||||
onItem: () => {},
|
||||
onLoading: () => {},
|
||||
getCommandConfirmation: async () => ({ review: "yes" }) as any,
|
||||
onLastResponseId: () => {},
|
||||
});
|
||||
|
||||
await loop.run([
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "hello" }],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(lastCreateParams).not.toBeNull();
|
||||
expect(lastCreateParams.model).toBe(cfg.model);
|
||||
expect(Array.isArray(lastCreateParams.input)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -132,8 +132,6 @@ describe("cancel clears previous_response_id", () => {
|
||||
] as any);
|
||||
|
||||
const bodies = _test.getBodies();
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(JSON.stringify(bodies, null, 2));
|
||||
expect(bodies.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// The *last* invocation belongs to the second run (after cancellation).
|
||||
|
||||
6
codex-rs/.gitignore
vendored
6
codex-rs/.gitignore
vendored
@@ -1 +1,7 @@
|
||||
/target/
|
||||
|
||||
# Recommended value of CARGO_TARGET_DIR when using Docker as explained in .devcontainer/README.md.
|
||||
/target-amd64/
|
||||
|
||||
# Value of CARGO_TARGET_DIR when using .devcontainer/devcontainer.json.
|
||||
/target-arm64/
|
||||
|
||||
767
codex-rs/Cargo.lock
generated
767
codex-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,9 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"login",
|
||||
"mcp-client",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
@@ -23,7 +26,7 @@ version = "0.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[workspace.lints]
|
||||
rust = { }
|
||||
rust = {}
|
||||
|
||||
[workspace.lints.clippy]
|
||||
expect_used = "deny"
|
||||
@@ -34,3 +37,6 @@ lto = "fat"
|
||||
# Because we bundle some of these executables with the TypeScript CLI, we
|
||||
# remove everything to make the binary as small as possible.
|
||||
strip = "symbols"
|
||||
|
||||
# See https://github.com/openai/codex/issues/1411 for details.
|
||||
codegen-units = 1
|
||||
|
||||
@@ -1,16 +1,63 @@
|
||||
# codex-rs
|
||||
# Codex CLI (Rust Implementation)
|
||||
|
||||
April 24, 2025
|
||||
We provide Codex CLI as a standalone, native executable to ensure a zero-dependency install.
|
||||
|
||||
Today, Codex CLI is written in TypeScript and requires Node.js 22+ to run it. For a number of users, this runtime requirement inhibits adoption: they would be better served by a standalone executable. As maintainers, we want Codex to run efficiently in a wide range of environments with minimal overhead. We also want to take advantage of operating system-specific APIs to provide better sandboxing, where possible.
|
||||
## Installing Codex
|
||||
|
||||
To that end, we are moving forward with a Rust implementation of Codex CLI contained in this folder, which has the following benefits:
|
||||
Today, the easiest way to install Codex is via `npm`, though we plan to publish Codex to other package managers soon.
|
||||
|
||||
- The CLI compiles to small, standalone, platform-specific binaries.
|
||||
- Can make direct, native calls to [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and [landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in order to support sandboxing on Linux.
|
||||
- No runtime garbage collection, resulting in lower memory consumption and better, more predictable performance.
|
||||
```shell
|
||||
npm i -g @openai/codex@native
|
||||
codex
|
||||
```
|
||||
|
||||
Currently, the Rust implementation is materially behind the TypeScript implementation in functionality, so continue to use the TypeScript implementation for the time being. We will publish native executables via GitHub Releases as soon as we feel the Rust version is usable.
|
||||
You can also download a platform-specific release directly from our [GitHub Releases](https://github.com/openai/codex/releases).
|
||||
|
||||
## What's new in the Rust CLI
|
||||
|
||||
While we are [working to close the gap between the TypeScript and Rust implementations of Codex CLI](https://github.com/openai/codex/issues/1262), note that the Rust CLI has a number of features that the TypeScript CLI does not!
|
||||
|
||||
### Config
|
||||
|
||||
Codex supports a rich set of configuration options. Note that the Rust CLI uses `config.toml` instead of `config.json`. See [`config.md`](./config.md) for details.
|
||||
|
||||
### Model Context Protocol Support
|
||||
|
||||
Codex CLI functions as an MCP client that can connect to MCP servers on startup. See the [`mcp_servers`](./config.md#mcp_servers) section in the configuration documentation for details.
|
||||
|
||||
It is still experimental, but you can also launch Codex as an MCP _server_ by running `codex mcp`. Use the [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) to try it out:
|
||||
|
||||
```shell
|
||||
npx @modelcontextprotocol/inspector codex mcp
|
||||
```
|
||||
|
||||
### Notifications
|
||||
|
||||
You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](./config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS.
|
||||
|
||||
### `codex exec` to run Codex programmatially/non-interactively
|
||||
|
||||
To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on.
|
||||
|
||||
### `--cd`/`-C` flag
|
||||
|
||||
Sometimes it is not convenient to `cd` to the directory you want Codex to use as the "working root" before running Codex. Fortunately, `codex` supports a `--cd` option so you can specify whatever folder you want. You can confirm that Codex is honoring `--cd` by double-checking the **workdir** it reports in the TUI at the start of a new session.
|
||||
|
||||
### Experimenting with the Codex Sandbox
|
||||
|
||||
To test to see what happens when a command is run under the sandbox provided by Codex, we provide the following subcommands in Codex CLI:
|
||||
|
||||
```
|
||||
# macOS
|
||||
codex debug seatbelt [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
|
||||
# Linux
|
||||
codex debug landlock [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
```
|
||||
|
||||
You can experiment with different values of `-s` to see what permissions the `COMMAND` needs to execute successfully.
|
||||
|
||||
Note that the exact API for the `-s` flag is currently in flux. See https://github.com/openai/codex/issues/1248 for details.
|
||||
|
||||
## Code Organization
|
||||
|
||||
@@ -20,330 +67,3 @@ This folder is the root of a Cargo workspace. It contains quite a bit of experim
|
||||
- [`exec/`](./exec) "headless" CLI for use in automation.
|
||||
- [`tui/`](./tui) CLI that launches a fullscreen TUI built with [Ratatui](https://ratatui.rs/).
|
||||
- [`cli/`](./cli) CLI multitool that provides the aforementioned CLIs via subcommands.
|
||||
|
||||
## Config
|
||||
|
||||
The CLI can be configured via a file named `config.toml`. By default, configuration is read from `~/.codex/config.toml`, though the `CODEX_HOME` environment variable can be used to specify a directory other than `~/.codex`.
|
||||
|
||||
The `config.toml` file supports the following options:
|
||||
|
||||
### model
|
||||
|
||||
The model that Codex should use.
|
||||
|
||||
```toml
|
||||
model = "o3" # overrides the default of "codex-mini-latest"
|
||||
```
|
||||
|
||||
### model_provider
|
||||
|
||||
Codex comes bundled with a number of "model providers" predefined. This config value is a string that indicates which provider to use. You can also define your own providers via `model_providers`.
|
||||
|
||||
For example, if you are running ollama with Mistral locally, then you would need to add the following to your config:
|
||||
|
||||
```toml
|
||||
model = "mistral"
|
||||
model_provider = "ollama"
|
||||
```
|
||||
|
||||
because the following definition for `ollama` is included in Codex:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
wire_api = "chat"
|
||||
```
|
||||
|
||||
This option defaults to `"openai"` and the corresponding provider is defined as follows:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "responses"
|
||||
```
|
||||
|
||||
### model_providers
|
||||
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the correspodning provider.
|
||||
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you
|
||||
|
||||
```toml
|
||||
# Recall that in TOML, root keys must be listed before tables.
|
||||
model = "gpt-4o"
|
||||
model_provider = "openai-chat-completions"
|
||||
|
||||
[model_providers.openai-chat-completions]
|
||||
# Name of the provider that will be displayed in the Codex UI.
|
||||
name = "OpenAI using Chat Completions"
|
||||
# The path `/chat/completions` will be amended to this URL to make the POST
|
||||
# request for the chat completions.
|
||||
base_url = "https://api.openai.com/v1"
|
||||
# If `env_key` is set, identifies an environment variable that must be set when
|
||||
# using Codex with this provider. The value of the environment variable must be
|
||||
# non-empty and will be used in the `Bearer TOKEN` HTTP header for the POST request.
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# valid values for wire_api are "chat" and "responses".
|
||||
wire_api = "chat"
|
||||
```
|
||||
|
||||
### approval_policy
|
||||
|
||||
Determines when the user should be prompted to approve whether Codex can execute a command:
|
||||
|
||||
```toml
|
||||
# This is analogous to --suggest in the TypeScript Codex CLI
|
||||
approval_policy = "unless-allow-listed"
|
||||
```
|
||||
|
||||
```toml
|
||||
# If the command fails when run in the sandbox, Codex asks for permission to
|
||||
# retry the command outside the sandbox.
|
||||
approval_policy = "on-failure"
|
||||
```
|
||||
|
||||
```toml
|
||||
# User is never prompted: if the command fails, Codex will automatically try
|
||||
# something out. Note the `exec` subcommand always uses this mode.
|
||||
approval_policy = "never"
|
||||
```
|
||||
|
||||
### profiles
|
||||
|
||||
A _profile_ is a collection of configuration values that can be set together. Multiple profiles can be defined in `config.toml` and you can specify the one you
|
||||
want to use at runtime via the `--profile` flag.
|
||||
|
||||
Here is an example of a `config.toml` that defines multiple profiles:
|
||||
|
||||
```toml
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
disable_response_storage = false
|
||||
|
||||
# Setting `profile` is equivalent to specifying `--profile o3` on the command
|
||||
# line, though the `--profile` flag can still be used to override this value.
|
||||
profile = "o3"
|
||||
|
||||
[model_providers.openai-chat-completions]
|
||||
name = "OpenAI using Chat Completions"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "chat"
|
||||
|
||||
[profiles.o3]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
model_provider = "openai-chat-completions"
|
||||
|
||||
[profiles.zdr]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
Users can specify config values at multiple levels. Order of precedence is as follows:
|
||||
|
||||
1. custom command-line argument, e.g., `--model o3`
|
||||
2. as part of a profile, where the `--profile` is specified via a CLI (or in the config file itself)
|
||||
3. as an entry in `config.toml`, e.g., `model = "o3"`
|
||||
4. the default value that comes with Codex CLI (i.e., Codex CLI defaults to `o4-mini`)
|
||||
|
||||
### sandbox_permissions
|
||||
|
||||
List of permissions to grant to the sandbox that Codex uses to execute untrusted commands:
|
||||
|
||||
```toml
|
||||
# This is comparable to --full-auto in the TypeScript Codex CLI, though
|
||||
# specifying `disk-write-platform-global-temp-folder` adds /tmp as a writable
|
||||
# folder in addition to $TMPDIR.
|
||||
sandbox_permissions = [
|
||||
"disk-full-read-access",
|
||||
"disk-write-platform-user-temp-folder",
|
||||
"disk-write-platform-global-temp-folder",
|
||||
"disk-write-cwd",
|
||||
]
|
||||
```
|
||||
|
||||
To add additional writable folders, use `disk-write-folder`, which takes a parameter (this can be specified multiple times):
|
||||
|
||||
```toml
|
||||
sandbox_permissions = [
|
||||
# ...
|
||||
"disk-write-folder=/Users/mbolin/.pyenv/shims",
|
||||
]
|
||||
```
|
||||
|
||||
### mcp_servers
|
||||
|
||||
Defines the list of MCP servers that Codex can consult for tool use. Currently, only servers that are launched by executing a program that communicate over stdio are supported. For servers that use the SSE transport, consider an adapter like [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy).
|
||||
|
||||
**Note:** Codex may cache the list of tools and resources from an MCP server so that Codex can include this information in context at startup without spawning all the servers. This is designed to save resources by loading MCP servers lazily.
|
||||
|
||||
This config option is comparable to how Claude and Cursor define `mcpServers` in their respective JSON config files, though because Codex uses TOML for its config language, the format is slightly different. For example, the following config in JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"server-name": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "mcp-server"],
|
||||
"env": {
|
||||
"API_KEY": "value"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Should be represented as follows in `~/.codex/config.toml`:
|
||||
|
||||
```toml
|
||||
# IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`.
|
||||
[mcp_servers.server-name]
|
||||
command = "npx"
|
||||
args = ["-y", "mcp-server"]
|
||||
env = { "API_KEY" = "value" }
|
||||
```
|
||||
|
||||
### disable_response_storage
|
||||
|
||||
Currently, customers whose accounts are set to use Zero Data Retention (ZDR) must set `disable_response_storage` to `true` so that Codex uses an alternative to the Responses API that works with ZDR:
|
||||
|
||||
```toml
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
### notify
|
||||
|
||||
Specify a program that will be executed to get notified about events generated by Codex. Note that the program will receive the notification argument as a string of JSON, e.g.:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "agent-turn-complete",
|
||||
"turn-id": "12345",
|
||||
"input-messages": ["Rename `foo` to `bar` and update the callsites."],
|
||||
"last-assistant-message": "Rename complete and verified `cargo build` succeeds."
|
||||
}
|
||||
```
|
||||
|
||||
The `"type"` property will always be set. Currently, `"agent-turn-complete"` is the only notification type that is supported.
|
||||
|
||||
As an example, here is a Python script that parses the JSON and decides whether to show a desktop push notification using [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: notify.py <NOTIFICATION_JSON>")
|
||||
return 1
|
||||
|
||||
try:
|
||||
notification = json.loads(sys.argv[1])
|
||||
except json.JSONDecodeError:
|
||||
return 1
|
||||
|
||||
match notification_type := notification.get("type"):
|
||||
case "agent-turn-complete":
|
||||
assistant_message = notification.get("last-assistant-message")
|
||||
if assistant_message:
|
||||
title = f"Codex: {assistant_message}"
|
||||
else:
|
||||
title = "Codex: Turn Complete!"
|
||||
input_messages = notification.get("input_messages", [])
|
||||
message = " ".join(input_messages)
|
||||
title += message
|
||||
case _:
|
||||
print(f"not sending a push notification for: {notification_type}")
|
||||
return 0
|
||||
|
||||
subprocess.check_output(
|
||||
[
|
||||
"terminal-notifier",
|
||||
"-title",
|
||||
title,
|
||||
"-message",
|
||||
message,
|
||||
"-group",
|
||||
"codex",
|
||||
"-ignoreDnD",
|
||||
"-activate",
|
||||
"com.googlecode.iterm2",
|
||||
]
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
```
|
||||
|
||||
To have Codex use this script for notifications, you would configure it via `notify` in `~/.codex/config.toml` using the appropriate path to `notify.py` on your computer:
|
||||
|
||||
```toml
|
||||
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
|
||||
```
|
||||
|
||||
### history
|
||||
|
||||
By default, Codex CLI records messages sent to the model in `$CODEX_HOME/history.jsonl`. Note that on UNIX, the file permissions are set to `o600`, so it should only be readable and writable by the owner.
|
||||
|
||||
To disable this behavior, configure `[history]` as follows:
|
||||
|
||||
```toml
|
||||
[history]
|
||||
persistence = "none" # "save-all" is the default value
|
||||
```
|
||||
|
||||
### file_opener
|
||||
|
||||
Identifies the editor/URI scheme to use for hyperlinking citations in model output. If set, citations to files in the model output will be hyperlinked using the specified URI scheme so they can be ctrl/cmd-clicked from the terminal to open them.
|
||||
|
||||
For example, if the model output includes a reference such as `【F:/home/user/project/main.py†L42-L50】`, then this would be rewritten to link to the URI `vscode://file/home/user/project/main.py:42`.
|
||||
|
||||
Note this is **not** a general editor setting (like `$EDITOR`), as it only accepts a fixed set of values:
|
||||
|
||||
- `"vscode"` (default)
|
||||
- `"vscode-insiders"`
|
||||
- `"windsurf"`
|
||||
- `"cursor"`
|
||||
- `"none"` to explicitly disable this feature
|
||||
|
||||
Currently, `"vscode"` is the default, though Codex does not verify VS Code is installed. As such, `file_opener` may default to `"none"` or something else in the future.
|
||||
|
||||
### project_doc_max_bytes
|
||||
|
||||
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
|
||||
|
||||
### tui
|
||||
|
||||
Options that are specific to the TUI.
|
||||
|
||||
```toml
|
||||
[tui]
|
||||
# This will make it so that Codex does not try to process mouse events, which
|
||||
# means your Terminal's native drag-to-text to text selection and copy/paste
|
||||
# should work. The tradeoff is that Codex will not receive any mouse events, so
|
||||
# it will not be possible to use the mouse to scroll conversation history.
|
||||
#
|
||||
# Note that most terminals support holding down a modifier key when using the
|
||||
# mouse to support text selection. For example, even if Codex mouse capture is
|
||||
# enabled (i.e., this is set to `false`), you can still hold down alt while
|
||||
# dragging the mouse to select text.
|
||||
disable_mouse_capture = true # defaults to `false`
|
||||
```
|
||||
|
||||
@@ -12,7 +12,6 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
regex = "1.11.1"
|
||||
serde_json = "1.0.110"
|
||||
similar = "2.7.0"
|
||||
thiserror = "2.0.12"
|
||||
|
||||
40
codex-rs/apply-patch/apply_patch_tool_instructions.md
Normal file
40
codex-rs/apply-patch/apply_patch_tool_instructions.md
Normal file
@@ -0,0 +1,40 @@
|
||||
To edit files, ALWAYS use the `shell` tool with `apply_patch` CLI. `apply_patch` effectively allows you to execute a diff/patch against a file, but the format of the diff specification is unique to this task, so pay careful attention to these instructions. To use the `apply_patch` CLI, you should call the shell tool with the following structure:
|
||||
|
||||
```bash
|
||||
{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n[YOUR_PATCH]\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
|
||||
```
|
||||
|
||||
Where [YOUR_PATCH] is the actual content of your patch, specified in the following V4A diff format.
|
||||
|
||||
*** [ACTION] File: [path/to/file] -> ACTION can be one of Add, Update, or Delete.
|
||||
For each snippet of code that needs to be changed, repeat the following:
|
||||
[context_before] -> See below for further instructions on context.
|
||||
- [old_code] -> Precede the old code with a minus sign.
|
||||
+ [new_code] -> Precede the new, replacement code with a plus sign.
|
||||
[context_after] -> See below for further instructions on context.
|
||||
|
||||
For instructions on [context_before] and [context_after]:
|
||||
- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines.
|
||||
- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:
|
||||
@@ class BaseClass
|
||||
[3 lines of pre-context]
|
||||
- [old_code]
|
||||
+ [new_code]
|
||||
[3 lines of post-context]
|
||||
|
||||
- If a code block is repeated so many times in a class or function such that even a single `@@` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple `@@` statements to jump to the right context. For instance:
|
||||
|
||||
@@ class BaseClass
|
||||
@@ def method():
|
||||
[3 lines of pre-context]
|
||||
- [old_code]
|
||||
+ [new_code]
|
||||
[3 lines of post-context]
|
||||
|
||||
Note, then, that we do not use line numbers in this diff format, as the context is enough to uniquely identify code. An example of a message that you might pass as "input" to this function, in order to apply a patch, is shown below.
|
||||
|
||||
```bash
|
||||
{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n*** Update File: pygorithm/searching/binary_search.py\\n@@ class BaseClass\\n@@ def search():\\n- pass\\n+ raise NotImplementedError()\\n@@ class Subclass\\n@@ def search():\\n- pass\\n+ raise NotImplementedError()\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
|
||||
```
|
||||
|
||||
File references can only be relative, NEVER ABSOLUTE. After the apply_patch command is run, it will always say "Done!", regardless of whether the patch was successfully applied or not. However, you can determine if there are issue and errors by looking at any warnings or logging lines printed BEFORE the "Done!" is output.
|
||||
@@ -19,6 +19,9 @@ use tree_sitter::LanguageError;
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_bash::LANGUAGE as BASH;
|
||||
|
||||
/// Detailed instructions for gpt-4.1 on how to use the `apply_patch` tool.
|
||||
pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_tool_instructions.md");
|
||||
|
||||
#[derive(Debug, Error, PartialEq)]
|
||||
pub enum ApplyPatchError {
|
||||
#[error(transparent)]
|
||||
|
||||
@@ -37,7 +37,15 @@ const EOF_MARKER: &str = "*** End of File";
|
||||
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
|
||||
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
|
||||
|
||||
#[derive(Debug, PartialEq, Error)]
|
||||
/// Currently, the only OpenAI model that knowingly requires lenient parsing is
|
||||
/// gpt-4.1. While we could try to require everyone to pass in a strictness
|
||||
/// param when invoking apply_patch, it is a pain to thread it through all of
|
||||
/// the call sites, so we resign ourselves allowing lenient parsing for all
|
||||
/// models. See [`ParseMode::Lenient`] for details on the exceptions we make for
|
||||
/// gpt-4.1.
|
||||
const PARSE_IN_STRICT_MODE: bool = false;
|
||||
|
||||
#[derive(Debug, PartialEq, Error, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("invalid patch: {0}")]
|
||||
InvalidPatchError(String),
|
||||
@@ -46,7 +54,7 @@ pub enum ParseError {
|
||||
}
|
||||
use ParseError::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
pub enum Hunk {
|
||||
AddFile {
|
||||
@@ -78,7 +86,7 @@ impl Hunk {
|
||||
|
||||
use Hunk::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct UpdateFileChunk {
|
||||
/// A single line of context used to narrow down the position of the chunk
|
||||
/// (this is usually a class, method, or function definition.)
|
||||
@@ -95,19 +103,68 @@ pub struct UpdateFileChunk {
|
||||
}
|
||||
|
||||
pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
|
||||
let mode = if PARSE_IN_STRICT_MODE {
|
||||
ParseMode::Strict
|
||||
} else {
|
||||
ParseMode::Lenient
|
||||
};
|
||||
parse_patch_text(patch, mode)
|
||||
}
|
||||
|
||||
enum ParseMode {
|
||||
/// Parse the patch text argument as is.
|
||||
Strict,
|
||||
|
||||
/// GPT-4.1 is known to formulate the `command` array for the `local_shell`
|
||||
/// tool call for `apply_patch` call using something like the following:
|
||||
///
|
||||
/// ```json
|
||||
/// [
|
||||
/// "apply_patch",
|
||||
/// "<<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
|
||||
/// ]
|
||||
/// ```
|
||||
///
|
||||
/// This is a problem because `local_shell` is a bit of a misnomer: the
|
||||
/// `command` is not invoked by passing the arguments to a shell like Bash,
|
||||
/// but are invoked using something akin to `execvpe(3)`.
|
||||
///
|
||||
/// This is significant in this case because where a shell would interpret
|
||||
/// `<<'EOF'...` as a heredoc and pass the contents via stdin (which is
|
||||
/// fine, as `apply_patch` is specified to read from stdin if no argument is
|
||||
/// passed), `execvpe(3)` interprets the heredoc as a literal string. To get
|
||||
/// the `local_shell` tool to run a command the way shell would, the
|
||||
/// `command` array must be something like:
|
||||
///
|
||||
/// ```json
|
||||
/// [
|
||||
/// "bash",
|
||||
/// "-lc",
|
||||
/// "apply_patch <<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
|
||||
/// ]
|
||||
/// ```
|
||||
///
|
||||
/// In lenient mode, we check if the argument to `apply_patch` starts with
|
||||
/// `<<'EOF'` and ends with `EOF\n`. If so, we strip off these markers,
|
||||
/// trim() the result, and treat what is left as the patch text.
|
||||
Lenient,
|
||||
}
|
||||
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseError> {
|
||||
let lines: Vec<&str> = patch.trim().lines().collect();
|
||||
if lines.is_empty() || lines[0] != BEGIN_PATCH_MARKER {
|
||||
return Err(InvalidPatchError(String::from(
|
||||
"The first line of the patch must be '*** Begin Patch'",
|
||||
)));
|
||||
}
|
||||
let last_line_index = lines.len() - 1;
|
||||
if lines[last_line_index] != END_PATCH_MARKER {
|
||||
return Err(InvalidPatchError(String::from(
|
||||
"The last line of the patch must be '*** End Patch'",
|
||||
)));
|
||||
}
|
||||
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
|
||||
Ok(()) => &lines,
|
||||
Err(e) => match mode {
|
||||
ParseMode::Strict => {
|
||||
return Err(e);
|
||||
}
|
||||
ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
|
||||
},
|
||||
};
|
||||
|
||||
let mut hunks: Vec<Hunk> = Vec::new();
|
||||
// The above checks ensure that lines.len() >= 2.
|
||||
let last_line_index = lines.len().saturating_sub(1);
|
||||
let mut remaining_lines = &lines[1..last_line_index];
|
||||
let mut line_number = 2;
|
||||
while !remaining_lines.is_empty() {
|
||||
@@ -119,6 +176,64 @@ pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
|
||||
Ok(hunks)
|
||||
}
|
||||
|
||||
/// Checks the start and end lines of the patch text for `apply_patch`,
|
||||
/// returning an error if they do not match the expected markers.
|
||||
fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
|
||||
let (first_line, last_line) = match lines {
|
||||
[] => (None, None),
|
||||
[first] => (Some(first), Some(first)),
|
||||
[first, .., last] => (Some(first), Some(last)),
|
||||
};
|
||||
check_start_and_end_lines_strict(first_line, last_line)
|
||||
}
|
||||
|
||||
/// If we are in lenient mode, we check if the first line starts with `<<EOF`
|
||||
/// (possibly quoted) and the last line ends with `EOF`. There must be at least
|
||||
/// 4 lines total because the heredoc markers take up 2 lines and the patch text
|
||||
/// must have at least 2 lines.
|
||||
///
|
||||
/// If successful, returns the lines of the patch text that contain the patch
|
||||
/// contents, excluding the heredoc markers.
|
||||
fn check_patch_boundaries_lenient<'a>(
|
||||
original_lines: &'a [&'a str],
|
||||
original_parse_error: ParseError,
|
||||
) -> Result<&'a [&'a str], ParseError> {
|
||||
match original_lines {
|
||||
[first, .., last] => {
|
||||
if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
|
||||
&& last.ends_with("EOF")
|
||||
&& original_lines.len() >= 4
|
||||
{
|
||||
let inner_lines = &original_lines[1..original_lines.len() - 1];
|
||||
match check_patch_boundaries_strict(inner_lines) {
|
||||
Ok(()) => Ok(inner_lines),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
} else {
|
||||
Err(original_parse_error)
|
||||
}
|
||||
}
|
||||
_ => Err(original_parse_error),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_start_and_end_lines_strict(
|
||||
first_line: Option<&&str>,
|
||||
last_line: Option<&&str>,
|
||||
) -> Result<(), ParseError> {
|
||||
match (first_line, last_line) {
|
||||
(Some(&first), Some(&last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => {
|
||||
Ok(())
|
||||
}
|
||||
(Some(&first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from(
|
||||
"The first line of the patch must be '*** Begin Patch'",
|
||||
))),
|
||||
_ => Err(InvalidPatchError(String::from(
|
||||
"The last line of the patch must be '*** End Patch'",
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempts to parse a single hunk from the start of lines.
|
||||
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
|
||||
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
||||
@@ -312,22 +427,23 @@ fn parse_update_file_chunk(
|
||||
#[test]
|
||||
fn test_parse_patch() {
|
||||
assert_eq!(
|
||||
parse_patch("bad"),
|
||||
parse_patch_text("bad", ParseMode::Strict),
|
||||
Err(InvalidPatchError(
|
||||
"The first line of the patch must be '*** Begin Patch'".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch("*** Begin Patch\nbad"),
|
||||
parse_patch_text("*** Begin Patch\nbad", ParseMode::Strict),
|
||||
Err(InvalidPatchError(
|
||||
"The last line of the patch must be '*** End Patch'".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Update File: test.py\n\
|
||||
*** End Patch"
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update file hunk for path 'test.py' is empty".to_string(),
|
||||
@@ -335,14 +451,15 @@ fn test_parse_patch() {
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** End Patch"
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(Vec::new())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Add File: path/add.py\n\
|
||||
+abc\n\
|
||||
@@ -353,7 +470,8 @@ fn test_parse_patch() {
|
||||
@@ def f():\n\
|
||||
- pass\n\
|
||||
+ return 123\n\
|
||||
*** End Patch"
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
AddFile {
|
||||
@@ -377,14 +495,15 @@ fn test_parse_patch() {
|
||||
);
|
||||
// Update hunk followed by another hunk (Add File).
|
||||
assert_eq!(
|
||||
parse_patch(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Update File: file.py\n\
|
||||
@@\n\
|
||||
+line\n\
|
||||
*** Add File: other.py\n\
|
||||
+content\n\
|
||||
*** End Patch"
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
UpdateFile {
|
||||
@@ -407,12 +526,13 @@ fn test_parse_patch() {
|
||||
// Update hunk without an explicit @@ header for the first chunk should parse.
|
||||
// Use a raw string to preserve the leading space diff marker on the context line.
|
||||
assert_eq!(
|
||||
parse_patch(
|
||||
parse_patch_text(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: file2.py
|
||||
import foo
|
||||
+bar
|
||||
*** End Patch"#,
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
@@ -427,6 +547,80 @@ fn test_parse_patch() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch_lenient() {
|
||||
let patch_text = r#"*** Begin Patch
|
||||
*** Update File: file2.py
|
||||
import foo
|
||||
+bar
|
||||
*** End Patch"#;
|
||||
let expected_patch = vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec!["import foo".to_string()],
|
||||
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}];
|
||||
let expected_error =
|
||||
InvalidPatchError("The first line of the patch must be '*** Begin Patch'".to_string());
|
||||
|
||||
let patch_text_in_heredoc = format!("<<EOF\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
);
|
||||
|
||||
let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
);
|
||||
|
||||
let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
);
|
||||
|
||||
let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Lenient),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
|
||||
let patch_text_with_missing_closing_heredoc =
|
||||
"<<EOF\n*** Begin Patch\n*** Update File: file2.py\nEOF\n".to_string();
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Lenient),
|
||||
Err(InvalidPatchError(
|
||||
"The last line of the patch must be '*** End Patch'".to_string()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_one_hunk() {
|
||||
assert_eq!(
|
||||
|
||||
@@ -7,10 +7,6 @@ edition = "2024"
|
||||
name = "codex"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-linux-sandbox"
|
||||
path = "src/linux-sandbox/main.rs"
|
||||
|
||||
[lib]
|
||||
name = "codex_cli"
|
||||
path = "src/lib.rs"
|
||||
@@ -24,6 +20,8 @@ clap = { version = "4", features = ["derive"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-common = { path = "../common", features = ["cli"] }
|
||||
codex-exec = { path = "../exec" }
|
||||
codex-login = { path = "../login" }
|
||||
codex-linux-sandbox = { path = "../linux-sandbox" }
|
||||
codex-mcp-server = { path = "../mcp-server" }
|
||||
codex-tui = { path = "../tui" }
|
||||
serde_json = "1"
|
||||
|
||||
113
codex-rs/cli/src/debug_sandbox.rs
Normal file
113
codex-rs/cli/src/debug_sandbox.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::exec::StdioPolicy;
|
||||
use codex_core::exec::spawn_command_under_linux_sandbox;
|
||||
use codex_core::exec::spawn_command_under_seatbelt;
|
||||
use codex_core::exec_env::create_env;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
use crate::LandlockCommand;
|
||||
use crate::SeatbeltCommand;
|
||||
use crate::exit_status::handle_exit_status;
|
||||
|
||||
pub async fn run_command_under_seatbelt(
|
||||
command: SeatbeltCommand,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let SeatbeltCommand {
|
||||
full_auto,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
SandboxType::Seatbelt,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn run_command_under_landlock(
|
||||
command: LandlockCommand,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let LandlockCommand {
|
||||
full_auto,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
SandboxType::Landlock,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
enum SandboxType {
|
||||
Seatbelt,
|
||||
Landlock,
|
||||
}
|
||||
|
||||
async fn run_command_under_sandbox(
|
||||
full_auto: bool,
|
||||
command: Vec<String>,
|
||||
config_overrides: CliConfigOverrides,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
sandbox_type: SandboxType,
|
||||
) -> anyhow::Result<()> {
|
||||
let sandbox_policy = create_sandbox_policy(full_auto);
|
||||
let cwd = std::env::current_dir()?;
|
||||
let config = Config::load_with_cli_overrides(
|
||||
config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?,
|
||||
ConfigOverrides {
|
||||
sandbox_policy: Some(sandbox_policy),
|
||||
codex_linux_sandbox_exe,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
let stdio_policy = StdioPolicy::Inherit;
|
||||
let env = create_env(&config.shell_environment_policy);
|
||||
|
||||
let mut child = match sandbox_type {
|
||||
SandboxType::Seatbelt => {
|
||||
spawn_command_under_seatbelt(command, &config.sandbox_policy, cwd, stdio_policy, env)
|
||||
.await?
|
||||
}
|
||||
SandboxType::Landlock => {
|
||||
#[expect(clippy::expect_used)]
|
||||
let codex_linux_sandbox_exe = config
|
||||
.codex_linux_sandbox_exe
|
||||
.expect("codex-linux-sandbox executable not found");
|
||||
spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
&config.sandbox_policy,
|
||||
cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let status = child.wait().await?;
|
||||
|
||||
handle_exit_status(status);
|
||||
}
|
||||
|
||||
pub fn create_sandbox_policy(full_auto: bool) -> SandboxPolicy {
|
||||
if full_auto {
|
||||
SandboxPolicy::new_workspace_write_policy()
|
||||
} else {
|
||||
SandboxPolicy::new_read_only_policy()
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
//! `debug landlock` implementation for the Codex CLI.
|
||||
//!
|
||||
//! On Linux the command is executed inside a Landlock + seccomp sandbox by
|
||||
//! calling the low-level `exec_linux` helper from `codex_core::linux`.
|
||||
|
||||
use codex_core::exec::StdioPolicy;
|
||||
use codex_core::exec::spawn_child_sync;
|
||||
use codex_core::exec_linux::apply_sandbox_policy_to_current_thread;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use std::process::ExitStatus;
|
||||
|
||||
use crate::exit_status::handle_exit_status;
|
||||
|
||||
/// Execute `command` in a Linux sandbox (Landlock + seccomp) the way Codex
|
||||
/// would.
|
||||
pub fn run_landlock(command: Vec<String>, sandbox_policy: SandboxPolicy) -> anyhow::Result<()> {
|
||||
if command.is_empty() {
|
||||
anyhow::bail!("command args are empty");
|
||||
}
|
||||
|
||||
// Spawn a new thread and apply the sandbox policies there.
|
||||
let handle = std::thread::spawn(move || -> anyhow::Result<ExitStatus> {
|
||||
let cwd = std::env::current_dir()?;
|
||||
|
||||
apply_sandbox_policy_to_current_thread(&sandbox_policy, &cwd)?;
|
||||
let mut child = spawn_child_sync(command, cwd, &sandbox_policy, StdioPolicy::Inherit)?;
|
||||
let status = child.wait()?;
|
||||
Ok(status)
|
||||
});
|
||||
let status = handle
|
||||
.join()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to join thread: {e:?}"))??;
|
||||
|
||||
handle_exit_status(status);
|
||||
}
|
||||
@@ -1,12 +1,10 @@
|
||||
pub mod debug_sandbox;
|
||||
mod exit_status;
|
||||
#[cfg(unix)]
|
||||
pub mod landlock;
|
||||
pub mod login;
|
||||
pub mod proto;
|
||||
pub mod seatbelt;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_common::CliConfigOverrides;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct SeatbeltCommand {
|
||||
@@ -14,8 +12,8 @@ pub struct SeatbeltCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
/// Full command args to run under seatbelt.
|
||||
#[arg(trailing_var_arg = true)]
|
||||
@@ -28,21 +26,10 @@ pub struct LandlockCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
/// Full command args to run under landlock.
|
||||
#[arg(trailing_var_arg = true)]
|
||||
pub command: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn create_sandbox_policy(full_auto: bool, sandbox: SandboxPermissionOption) -> SandboxPolicy {
|
||||
if full_auto {
|
||||
SandboxPolicy::new_full_auto_policy()
|
||||
} else {
|
||||
match sandbox.permissions.map(Into::into) {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn main() -> anyhow::Result<()> {
|
||||
eprintln!("codex-linux-sandbox is not supported on this platform.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn main() -> anyhow::Result<()> {
|
||||
use clap::Parser;
|
||||
use codex_cli::LandlockCommand;
|
||||
use codex_cli::create_sandbox_policy;
|
||||
use codex_cli::landlock;
|
||||
|
||||
let LandlockCommand {
|
||||
full_auto,
|
||||
sandbox,
|
||||
command,
|
||||
} = LandlockCommand::parse();
|
||||
let sandbox_policy = create_sandbox_policy(full_auto, sandbox);
|
||||
landlock::run_landlock(command, sandbox_policy)?;
|
||||
Ok(())
|
||||
}
|
||||
35
codex-rs/cli/src/login.rs
Normal file
35
codex-rs/cli/src/login.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_login::login_with_chatgpt;
|
||||
|
||||
pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) -> ! {
|
||||
let cli_overrides = match cli_config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config_overrides = ConfigOverrides::default();
|
||||
let config = match Config::load_with_cli_overrides(cli_overrides, config_overrides) {
|
||||
Ok(config) => config,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading configuration: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let capture_output = false;
|
||||
match login_with_chatgpt(&config.codex_home, capture_output).await {
|
||||
Ok(_) => {
|
||||
eprintln!("Successfully logged in");
|
||||
std::process::exit(0);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error logging in: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
use clap::Parser;
|
||||
use codex_cli::LandlockCommand;
|
||||
use codex_cli::SeatbeltCommand;
|
||||
use codex_cli::create_sandbox_policy;
|
||||
use codex_cli::login::run_login_with_chatgpt;
|
||||
use codex_cli::proto;
|
||||
use codex_cli::seatbelt;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_exec::Cli as ExecCli;
|
||||
use codex_tui::Cli as TuiCli;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::proto::ProtoCli;
|
||||
|
||||
@@ -20,6 +21,9 @@ use crate::proto::ProtoCli;
|
||||
subcommand_negates_reqs = true
|
||||
)]
|
||||
struct MultitoolCli {
|
||||
#[clap(flatten)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
#[clap(flatten)]
|
||||
interactive: TuiCli,
|
||||
|
||||
@@ -33,6 +37,9 @@ enum Subcommand {
|
||||
#[clap(visible_alias = "e")]
|
||||
Exec(ExecCli),
|
||||
|
||||
/// Login with ChatGPT.
|
||||
Login(LoginCommand),
|
||||
|
||||
/// Experimental: run Codex as an MCP server.
|
||||
Mcp,
|
||||
|
||||
@@ -60,49 +67,72 @@ enum DebugCommand {
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct ReplProto {}
|
||||
struct LoginCommand {
|
||||
#[clap(skip)]
|
||||
config_overrides: CliConfigOverrides,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
fn main() -> anyhow::Result<()> {
|
||||
codex_linux_sandbox::run_with_sandbox(|codex_linux_sandbox_exe| async move {
|
||||
cli_main(codex_linux_sandbox_exe).await?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
let cli = MultitoolCli::parse();
|
||||
|
||||
match cli.subcommand {
|
||||
None => {
|
||||
codex_tui::run_main(cli.interactive)?;
|
||||
let mut tui_cli = cli.interactive;
|
||||
prepend_config_flags(&mut tui_cli.config_overrides, cli.config_overrides);
|
||||
codex_tui::run_main(tui_cli, codex_linux_sandbox_exe)?;
|
||||
}
|
||||
Some(Subcommand::Exec(exec_cli)) => {
|
||||
codex_exec::run_main(exec_cli).await?;
|
||||
Some(Subcommand::Exec(mut exec_cli)) => {
|
||||
prepend_config_flags(&mut exec_cli.config_overrides, cli.config_overrides);
|
||||
codex_exec::run_main(exec_cli, codex_linux_sandbox_exe).await?;
|
||||
}
|
||||
Some(Subcommand::Mcp) => {
|
||||
codex_mcp_server::run_main().await?;
|
||||
codex_mcp_server::run_main(codex_linux_sandbox_exe).await?;
|
||||
}
|
||||
Some(Subcommand::Proto(proto_cli)) => {
|
||||
Some(Subcommand::Login(mut login_cli)) => {
|
||||
prepend_config_flags(&mut login_cli.config_overrides, cli.config_overrides);
|
||||
run_login_with_chatgpt(login_cli.config_overrides).await;
|
||||
}
|
||||
Some(Subcommand::Proto(mut proto_cli)) => {
|
||||
prepend_config_flags(&mut proto_cli.config_overrides, cli.config_overrides);
|
||||
proto::run_main(proto_cli).await?;
|
||||
}
|
||||
Some(Subcommand::Debug(debug_args)) => match debug_args.cmd {
|
||||
DebugCommand::Seatbelt(SeatbeltCommand {
|
||||
command,
|
||||
sandbox,
|
||||
full_auto,
|
||||
}) => {
|
||||
let sandbox_policy = create_sandbox_policy(full_auto, sandbox);
|
||||
seatbelt::run_seatbelt(command, sandbox_policy).await?;
|
||||
DebugCommand::Seatbelt(mut seatbelt_cli) => {
|
||||
prepend_config_flags(&mut seatbelt_cli.config_overrides, cli.config_overrides);
|
||||
codex_cli::debug_sandbox::run_command_under_seatbelt(
|
||||
seatbelt_cli,
|
||||
codex_linux_sandbox_exe,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
#[cfg(unix)]
|
||||
DebugCommand::Landlock(LandlockCommand {
|
||||
command,
|
||||
sandbox,
|
||||
full_auto,
|
||||
}) => {
|
||||
let sandbox_policy = create_sandbox_policy(full_auto, sandbox);
|
||||
codex_cli::landlock::run_landlock(command, sandbox_policy)?;
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
DebugCommand::Landlock(_) => {
|
||||
anyhow::bail!("Landlock is only supported on Linux.");
|
||||
DebugCommand::Landlock(mut landlock_cli) => {
|
||||
prepend_config_flags(&mut landlock_cli.config_overrides, cli.config_overrides);
|
||||
codex_cli::debug_sandbox::run_command_under_landlock(
|
||||
landlock_cli,
|
||||
codex_linux_sandbox_exe,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepend root-level overrides so they have lower precedence than
|
||||
/// CLI-specific ones specified after the subcommand (if any).
|
||||
fn prepend_config_flags(
|
||||
subcommand_config_overrides: &mut CliConfigOverrides,
|
||||
cli_config_overrides: CliConfigOverrides,
|
||||
) {
|
||||
subcommand_config_overrides
|
||||
.raw_overrides
|
||||
.splice(0..0, cli_config_overrides.raw_overrides);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::io::IsTerminal;
|
||||
use std::sync::Arc;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::Codex;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
@@ -13,9 +14,12 @@ use tracing::error;
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct ProtoCli {}
|
||||
pub struct ProtoCli {
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
}
|
||||
|
||||
pub async fn run_main(_opts: ProtoCli) -> anyhow::Result<()> {
|
||||
pub async fn run_main(opts: ProtoCli) -> anyhow::Result<()> {
|
||||
if std::io::stdin().is_terminal() {
|
||||
anyhow::bail!("Protocol mode expects stdin to be a pipe, not a terminal");
|
||||
}
|
||||
@@ -24,7 +28,12 @@ pub async fn run_main(_opts: ProtoCli) -> anyhow::Result<()> {
|
||||
.with_writer(std::io::stderr)
|
||||
.init();
|
||||
|
||||
let config = Config::load_with_overrides(ConfigOverrides::default())?;
|
||||
let ProtoCli { config_overrides } = opts;
|
||||
let overrides_vec = config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?;
|
||||
|
||||
let config = Config::load_with_cli_overrides(overrides_vec, ConfigOverrides::default())?;
|
||||
let ctrl_c = notify_on_sigint();
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let codex = Arc::new(codex);
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
use codex_core::exec::StdioPolicy;
|
||||
use codex_core::exec::spawn_command_under_seatbelt;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
use crate::exit_status::handle_exit_status;
|
||||
|
||||
pub async fn run_seatbelt(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> anyhow::Result<()> {
|
||||
let cwd = std::env::current_dir()?;
|
||||
let mut child =
|
||||
spawn_command_under_seatbelt(command, &sandbox_policy, cwd, StdioPolicy::Inherit).await?;
|
||||
let status = child.wait().await?;
|
||||
handle_exit_status(status);
|
||||
}
|
||||
@@ -9,8 +9,11 @@ workspace = true
|
||||
[dependencies]
|
||||
clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
|
||||
codex-core = { path = "../core" }
|
||||
toml = { version = "0.8", optional = true }
|
||||
serde = { version = "1", optional = true }
|
||||
|
||||
[features]
|
||||
# Separate feature so that `clap` is not a mandatory dependency.
|
||||
cli = ["clap"]
|
||||
cli = ["clap", "toml", "serde"]
|
||||
elapsed = []
|
||||
sandbox_summary = []
|
||||
|
||||
@@ -1,27 +1,23 @@
|
||||
//! Standard type to use with the `--approval-mode` CLI option.
|
||||
//! Available when the `cli` feature is enabled for the crate.
|
||||
|
||||
use clap::ArgAction;
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
|
||||
use codex_core::config::parse_sandbox_permission_with_base_path;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPermission;
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum ApprovalModeCliArg {
|
||||
/// Only run "trusted" commands (e.g. ls, cat, sed) without asking for user
|
||||
/// approval. Will escalate to the user if the model proposes a command that
|
||||
/// is not in the "trusted" set.
|
||||
Untrusted,
|
||||
|
||||
/// Run all commands without asking for user approval.
|
||||
/// Only asks for approval if a command fails to execute, in which case it
|
||||
/// will escalate to the user to ask for un-sandboxed execution.
|
||||
OnFailure,
|
||||
|
||||
/// Only run "known safe" commands (e.g. ls, cat, sed) without
|
||||
/// asking for user approval. Will escalate to the user if the model
|
||||
/// proposes a command that is not allow-listed.
|
||||
UnlessAllowListed,
|
||||
|
||||
/// Never ask for user approval
|
||||
/// Execution failures are immediately returned to the model.
|
||||
Never,
|
||||
@@ -30,44 +26,9 @@ pub enum ApprovalModeCliArg {
|
||||
impl From<ApprovalModeCliArg> for AskForApproval {
|
||||
fn from(value: ApprovalModeCliArg) -> Self {
|
||||
match value {
|
||||
ApprovalModeCliArg::Untrusted => AskForApproval::UnlessTrusted,
|
||||
ApprovalModeCliArg::OnFailure => AskForApproval::OnFailure,
|
||||
ApprovalModeCliArg::UnlessAllowListed => AskForApproval::UnlessAllowListed,
|
||||
ApprovalModeCliArg::Never => AskForApproval::Never,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct SandboxPermissionOption {
|
||||
/// Specify this flag multiple times to specify the full set of permissions
|
||||
/// to grant to Codex.
|
||||
///
|
||||
/// ```shell
|
||||
/// codex -s disk-full-read-access \
|
||||
/// -s disk-write-cwd \
|
||||
/// -s disk-write-platform-user-temp-folder \
|
||||
/// -s disk-write-platform-global-temp-folder
|
||||
/// ```
|
||||
///
|
||||
/// Note disk-write-folder takes a value:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-write-folder=$HOME/.pyenv/shims
|
||||
/// ```
|
||||
///
|
||||
/// These permissions are quite broad and should be used with caution:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-full-write-access
|
||||
/// -s network-full-access
|
||||
/// ```
|
||||
#[arg(long = "sandbox-permission", short = 's', action = ArgAction::Append, value_parser = parse_sandbox_permission)]
|
||||
pub permissions: Option<Vec<SandboxPermission>>,
|
||||
}
|
||||
|
||||
/// Custom value-parser so we can keep the CLI surface small *and*
|
||||
/// still handle the parameterised `disk-write-folder` case.
|
||||
fn parse_sandbox_permission(raw: &str) -> std::io::Result<SandboxPermission> {
|
||||
let base_path = std::env::current_dir()?;
|
||||
parse_sandbox_permission_with_base_path(raw, base_path)
|
||||
}
|
||||
|
||||
170
codex-rs/common/src/config_override.rs
Normal file
170
codex-rs/common/src/config_override.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
//! Support for `-c key=value` overrides shared across Codex CLI tools.
|
||||
//!
|
||||
//! This module provides a [`CliConfigOverrides`] struct that can be embedded
|
||||
//! into a `clap`-derived CLI struct using `#[clap(flatten)]`. Each occurrence
|
||||
//! of `-c key=value` (or `--config key=value`) will be collected as a raw
|
||||
//! string. Helper methods are provided to convert the raw strings into
|
||||
//! key/value pairs as well as to apply them onto a mutable
|
||||
//! `serde_json::Value` representing the configuration tree.
|
||||
|
||||
use clap::ArgAction;
|
||||
use clap::Parser;
|
||||
use serde::de::Error as SerdeError;
|
||||
use toml::Value;
|
||||
|
||||
/// CLI option that captures arbitrary configuration overrides specified as
|
||||
/// `-c key=value`. It intentionally keeps both halves **unparsed** so that the
|
||||
/// calling code can decide how to interpret the right-hand side.
|
||||
#[derive(Parser, Debug, Default, Clone)]
|
||||
pub struct CliConfigOverrides {
|
||||
/// Override a configuration value that would otherwise be loaded from
|
||||
/// `~/.codex/config.toml`. Use a dotted path (`foo.bar.baz`) to override
|
||||
/// nested values. The `value` portion is parsed as JSON. If it fails to
|
||||
/// parse as JSON, the raw string is used as a literal.
|
||||
///
|
||||
/// Examples:
|
||||
/// - `-c model="o3"`
|
||||
/// - `-c 'sandbox_permissions=["disk-full-read-access"]'`
|
||||
/// - `-c shell_environment_policy.inherit=all`
|
||||
#[arg(
|
||||
short = 'c',
|
||||
long = "config",
|
||||
value_name = "key=value",
|
||||
action = ArgAction::Append,
|
||||
global = true,
|
||||
)]
|
||||
pub raw_overrides: Vec<String>,
|
||||
}
|
||||
|
||||
impl CliConfigOverrides {
|
||||
/// Parse the raw strings captured from the CLI into a list of `(path,
|
||||
/// value)` tuples where `value` is a `serde_json::Value`.
|
||||
pub fn parse_overrides(&self) -> Result<Vec<(String, Value)>, String> {
|
||||
self.raw_overrides
|
||||
.iter()
|
||||
.map(|s| {
|
||||
// Only split on the *first* '=' so values are free to contain
|
||||
// the character.
|
||||
let mut parts = s.splitn(2, '=');
|
||||
let key = match parts.next() {
|
||||
Some(k) => k.trim(),
|
||||
None => return Err("Override missing key".to_string()),
|
||||
};
|
||||
let value_str = parts
|
||||
.next()
|
||||
.ok_or_else(|| format!("Invalid override (missing '='): {s}"))?
|
||||
.trim();
|
||||
|
||||
if key.is_empty() {
|
||||
return Err(format!("Empty key in override: {s}"));
|
||||
}
|
||||
|
||||
// Attempt to parse as JSON. If that fails, treat it as a raw
|
||||
// string. This allows convenient usage such as
|
||||
// `-c model=o3` without the quotes.
|
||||
let value: Value = match parse_toml_value(value_str) {
|
||||
Ok(v) => v,
|
||||
Err(_) => Value::String(value_str.to_string()),
|
||||
};
|
||||
|
||||
Ok((key.to_string(), value))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Apply all parsed overrides onto `target`. Intermediate objects will be
|
||||
/// created as necessary. Values located at the destination path will be
|
||||
/// replaced.
|
||||
pub fn apply_on_value(&self, target: &mut Value) -> Result<(), String> {
|
||||
let overrides = self.parse_overrides()?;
|
||||
for (path, value) in overrides {
|
||||
apply_single_override(target, &path, value);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a single override onto `root`, creating intermediate objects as
|
||||
/// necessary.
|
||||
fn apply_single_override(root: &mut Value, path: &str, value: Value) {
|
||||
use toml::value::Table;
|
||||
|
||||
let parts: Vec<&str> = path.split('.').collect();
|
||||
let mut current = root;
|
||||
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
let is_last = i == parts.len() - 1;
|
||||
|
||||
if is_last {
|
||||
match current {
|
||||
Value::Table(tbl) => {
|
||||
tbl.insert((*part).to_string(), value);
|
||||
}
|
||||
_ => {
|
||||
let mut tbl = Table::new();
|
||||
tbl.insert((*part).to_string(), value);
|
||||
*current = Value::Table(tbl);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Traverse or create intermediate table.
|
||||
match current {
|
||||
Value::Table(tbl) => {
|
||||
current = tbl
|
||||
.entry((*part).to_string())
|
||||
.or_insert_with(|| Value::Table(Table::new()));
|
||||
}
|
||||
_ => {
|
||||
*current = Value::Table(Table::new());
|
||||
if let Value::Table(tbl) = current {
|
||||
current = tbl
|
||||
.entry((*part).to_string())
|
||||
.or_insert_with(|| Value::Table(Table::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_toml_value(raw: &str) -> Result<Value, toml::de::Error> {
|
||||
let wrapped = format!("_x_ = {raw}");
|
||||
let table: toml::Table = toml::from_str(&wrapped)?;
|
||||
table
|
||||
.get("_x_")
|
||||
.cloned()
|
||||
.ok_or_else(|| SerdeError::custom("missing sentinel key"))
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "cli"))]
|
||||
#[allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_basic_scalar() {
|
||||
let v = parse_toml_value("42").expect("parse");
|
||||
assert_eq!(v.as_integer(), Some(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fails_on_unquoted_string() {
|
||||
assert!(parse_toml_value("hello").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_array() {
|
||||
let v = parse_toml_value("[1, 2, 3]").expect("parse");
|
||||
let arr = v.as_array().expect("array");
|
||||
assert_eq!(arr.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_inline_table() {
|
||||
let v = parse_toml_value("{a = 1, b = 2}").expect("parse");
|
||||
let tbl = v.as_table().expect("table");
|
||||
assert_eq!(tbl.get("a").unwrap().as_integer(), Some(1));
|
||||
assert_eq!(tbl.get("b").unwrap().as_integer(), Some(2));
|
||||
}
|
||||
}
|
||||
@@ -6,5 +6,14 @@ pub mod elapsed;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::ApprovalModeCliArg;
|
||||
|
||||
#[cfg(any(feature = "cli", test))]
|
||||
mod config_override;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::SandboxPermissionOption;
|
||||
pub use config_override::CliConfigOverrides;
|
||||
|
||||
mod sandbox_summary;
|
||||
|
||||
#[cfg(feature = "sandbox_summary")]
|
||||
pub use sandbox_summary::summarize_sandbox_policy;
|
||||
|
||||
28
codex-rs/common/src/sandbox_summary.rs
Normal file
28
codex-rs/common/src/sandbox_summary.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
pub fn summarize_sandbox_policy(sandbox_policy: &SandboxPolicy) -> String {
|
||||
match sandbox_policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access,
|
||||
} => {
|
||||
let mut summary = "workspace-write".to_string();
|
||||
if !writable_roots.is_empty() {
|
||||
summary.push_str(&format!(
|
||||
" [{}]",
|
||||
writable_roots
|
||||
.iter()
|
||||
.map(|p| p.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
));
|
||||
}
|
||||
if *network_access {
|
||||
summary.push_str(" (network access enabled)");
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}
|
||||
454
codex-rs/config.md
Normal file
454
codex-rs/config.md
Normal file
@@ -0,0 +1,454 @@
|
||||
# Config
|
||||
|
||||
Codex supports several mechanisms for setting config values:
|
||||
|
||||
- Config-specific command-line flags, such as `--model o3` (highest precedence).
|
||||
- A generic `-c`/`--config` flag that takes a `key=value` pair, such as `--config model="o3"`.
|
||||
- The key can contain dots to set a value deeper than the root, e.g. `--config model_providers.openai.wire_api="chat"`.
|
||||
- Values can contain objects, such as `--config shell_environment_policy.include_only=["PATH", "HOME", "USER"]`.
|
||||
- For consistency with `config.toml`, values are in TOML format rather than JSON format, so use `{a = 1, b = 2}` rather than `{"a": 1, "b": 2}`.
|
||||
- If `value` cannot be parsed as a valid TOML value, it is treated as a string value. This means that both `-c model="o3"` and `-c model=o3` are equivalent.
|
||||
- The `$CODEX_HOME/config.toml` configuration file where the `CODEX_HOME` environment value defaults to `~/.codex`. (Note `CODEX_HOME` will also be where logs and other Codex-related information are stored.)
|
||||
|
||||
Both the `--config` flag and the `config.toml` file support the following options:
|
||||
|
||||
## model
|
||||
|
||||
The model that Codex should use.
|
||||
|
||||
```toml
|
||||
model = "o3" # overrides the default of "codex-mini-latest"
|
||||
```
|
||||
|
||||
## model_providers
|
||||
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the corresponding provider.
|
||||
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you could add the following configuration:
|
||||
|
||||
```toml
|
||||
# Recall that in TOML, root keys must be listed before tables.
|
||||
model = "gpt-4o"
|
||||
model_provider = "openai-chat-completions"
|
||||
|
||||
[model_providers.openai-chat-completions]
|
||||
# Name of the provider that will be displayed in the Codex UI.
|
||||
name = "OpenAI using Chat Completions"
|
||||
# The path `/chat/completions` will be amended to this URL to make the POST
|
||||
# request for the chat completions.
|
||||
base_url = "https://api.openai.com/v1"
|
||||
# If `env_key` is set, identifies an environment variable that must be set when
|
||||
# using Codex with this provider. The value of the environment variable must be
|
||||
# non-empty and will be used in the `Bearer TOKEN` HTTP header for the POST request.
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# Valid values for wire_api are "chat" and "responses". Defaults to "chat" if omitted.
|
||||
wire_api = "chat"
|
||||
# If necessary, extra query params that need to be added to the URL.
|
||||
# See the Azure example below.
|
||||
query_params = {}
|
||||
```
|
||||
|
||||
Note this makes it possible to use Codex CLI with non-OpenAI models, so long as they use a wire API that is compatible with the OpenAI chat completions API. For example, you could define the following provider to use Codex CLI with Ollama running locally:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
```
|
||||
|
||||
Or a third-party provider (using a distinct environment variable for the API key):
|
||||
|
||||
```toml
|
||||
[model_providers.mistral]
|
||||
name = "Mistral"
|
||||
base_url = "https://api.mistral.ai/v1"
|
||||
env_key = "MISTRAL_API_KEY"
|
||||
```
|
||||
|
||||
Note that Azure requires `api-version` to be passed as a query parameter, so be sure to specify it as part of `query_params` when defining the Azure provider:
|
||||
|
||||
```toml
|
||||
[model_providers.azure]
|
||||
name = "Azure"
|
||||
# Make sure you set the appropriate subdomain for this URL.
|
||||
base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY" # Or "OPENAI_API_KEY", whichever you use.
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
```
|
||||
|
||||
## model_provider
|
||||
|
||||
Identifies which provider to use from the `model_providers` map. Defaults to `"openai"`.
|
||||
|
||||
Note that if you override `model_provider`, then you likely want to override
|
||||
`model`, as well. For example, if you are running ollama with Mistral locally,
|
||||
then you would need to add the following to your config in addition to the new entry in the `model_providers` map:
|
||||
|
||||
```toml
|
||||
model = "mistral"
|
||||
model_provider = "ollama"
|
||||
```
|
||||
|
||||
## approval_policy
|
||||
|
||||
Determines when the user should be prompted to approve whether Codex can execute a command:
|
||||
|
||||
```toml
|
||||
# Codex has hardcoded logic that defines a set of "trusted" commands.
|
||||
# Setting the approval_policy to `untrusted` means that Codex will prompt the
|
||||
# user before running a command not in the "trusted" set.
|
||||
#
|
||||
# See https://github.com/openai/codex/issues/1260 for the plan to enable
|
||||
# end-users to define their own trusted commands.
|
||||
approval_policy = "untrusted"
|
||||
```
|
||||
|
||||
```toml
|
||||
# If the command fails when run in the sandbox, Codex asks for permission to
|
||||
# retry the command outside the sandbox.
|
||||
approval_policy = "on-failure"
|
||||
```
|
||||
|
||||
```toml
|
||||
# User is never prompted: if the command fails, Codex will automatically try
|
||||
# something out. Note the `exec` subcommand always uses this mode.
|
||||
approval_policy = "never"
|
||||
```
|
||||
|
||||
## profiles
|
||||
|
||||
A _profile_ is a collection of configuration values that can be set together. Multiple profiles can be defined in `config.toml` and you can specify the one you
|
||||
want to use at runtime via the `--profile` flag.
|
||||
|
||||
Here is an example of a `config.toml` that defines multiple profiles:
|
||||
|
||||
```toml
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
disable_response_storage = false
|
||||
|
||||
# Setting `profile` is equivalent to specifying `--profile o3` on the command
|
||||
# line, though the `--profile` flag can still be used to override this value.
|
||||
profile = "o3"
|
||||
|
||||
[model_providers.openai-chat-completions]
|
||||
name = "OpenAI using Chat Completions"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "chat"
|
||||
|
||||
[profiles.o3]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
model_provider = "openai-chat-completions"
|
||||
|
||||
[profiles.zdr]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
Users can specify config values at multiple levels. Order of precedence is as follows:
|
||||
|
||||
1. custom command-line argument, e.g., `--model o3`
|
||||
2. as part of a profile, where the `--profile` is specified via a CLI (or in the config file itself)
|
||||
3. as an entry in `config.toml`, e.g., `model = "o3"`
|
||||
4. the default value that comes with Codex CLI (i.e., Codex CLI defaults to `codex-mini-latest`)
|
||||
|
||||
## model_reasoning_effort
|
||||
|
||||
If the model name starts with `"o"` (as in `"o3"` or `"o4-mini"`) or `"codex"`, reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning), this can be set to:
|
||||
|
||||
- `"low"`
|
||||
- `"medium"` (default)
|
||||
- `"high"`
|
||||
|
||||
To disable reasoning, set `model_reasoning_effort` to `"none"` in your config:
|
||||
|
||||
```toml
|
||||
model_reasoning_effort = "none" # disable reasoning
|
||||
```
|
||||
|
||||
## model_reasoning_summary
|
||||
|
||||
If the model name starts with `"o"` (as in `"o3"` or `"o4-mini"`) or `"codex"`, reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries), this can be set to:
|
||||
|
||||
- `"auto"` (default)
|
||||
- `"concise"`
|
||||
- `"detailed"`
|
||||
|
||||
To disable reasoning summaries, set `model_reasoning_summary` to `"none"` in your config:
|
||||
|
||||
```toml
|
||||
model_reasoning_summary = "none" # disable reasoning summaries
|
||||
```
|
||||
|
||||
## sandbox
|
||||
|
||||
The `sandbox` configuration determines the _sandbox policy_ that Codex uses to execute untrusted commands. The `mode` determines the "base policy." Currently, only `workspace-write` supports additional configuration options, but this may change in the future.
|
||||
|
||||
The default policy is `read-only`, which means commands can read any file on disk, but attempts to write a file or access the network will be blocked.
|
||||
|
||||
```toml
|
||||
[sandbox]
|
||||
mode = "read-only"
|
||||
```
|
||||
|
||||
A more relaxed policy is `workspace-write`. When specified, the current working directory for the Codex task will be writable (as well as `$TMPDIR` on macOS). Note that the CLI defaults to using `cwd` where it was spawned, though this can be overridden using `--cwd/-C`.
|
||||
|
||||
```toml
|
||||
[sandbox]
|
||||
mode = "workspace-write"
|
||||
|
||||
# By default, only the cwd for the Codex session will be writable (and $TMPDIR on macOS),
|
||||
# but you can specify additional writable folders in this array.
|
||||
writable_roots = [
|
||||
"/tmp",
|
||||
]
|
||||
network_access = false # Like read-only, this also defaults to false and can be omitted.
|
||||
```
|
||||
|
||||
To disable sandboxing altogether, specify `danger-full-access` like so:
|
||||
|
||||
```toml
|
||||
[sandbox]
|
||||
mode = "danger-full-access"
|
||||
```
|
||||
|
||||
This is reasonable to use if Codex is running in an environment that provides its own sandboxing (such as a Docker container) such that further sandboxing is unnecessary.
|
||||
|
||||
Though using this option may also be necessary if you try to use Codex in environments where its native sandboxing mechanisms are unsupported, such as older Linux kernels or on Windows.
|
||||
|
||||
## mcp_servers
|
||||
|
||||
Defines the list of MCP servers that Codex can consult for tool use. Currently, only servers that are launched by executing a program that communicate over stdio are supported. For servers that use the SSE transport, consider an adapter like [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy).
|
||||
|
||||
**Note:** Codex may cache the list of tools and resources from an MCP server so that Codex can include this information in context at startup without spawning all the servers. This is designed to save resources by loading MCP servers lazily.
|
||||
|
||||
This config option is comparable to how Claude and Cursor define `mcpServers` in their respective JSON config files, though because Codex uses TOML for its config language, the format is slightly different. For example, the following config in JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"server-name": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "mcp-server"],
|
||||
"env": {
|
||||
"API_KEY": "value"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Should be represented as follows in `~/.codex/config.toml`:
|
||||
|
||||
```toml
|
||||
# IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`.
|
||||
[mcp_servers.server-name]
|
||||
command = "npx"
|
||||
args = ["-y", "mcp-server"]
|
||||
env = { "API_KEY" = "value" }
|
||||
```
|
||||
|
||||
## disable_response_storage
|
||||
|
||||
Currently, customers whose accounts are set to use Zero Data Retention (ZDR) must set `disable_response_storage` to `true` so that Codex uses an alternative to the Responses API that works with ZDR:
|
||||
|
||||
```toml
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
## shell_environment_policy
|
||||
|
||||
Codex spawns subprocesses (e.g. when executing a `local_shell` tool-call suggested by the assistant). By default it passes **only a minimal core subset** of your environment to those subprocesses to avoid leaking credentials. You can tune this behavior via the **`shell_environment_policy`** block in
|
||||
`config.toml`:
|
||||
|
||||
```toml
|
||||
[shell_environment_policy]
|
||||
# inherit can be "core" (default), "all", or "none"
|
||||
inherit = "core"
|
||||
# set to true to *skip* the filter for `"*KEY*"` and `"*TOKEN*"`
|
||||
ignore_default_excludes = false
|
||||
# exclude patterns (case-insensitive globs)
|
||||
exclude = ["AWS_*", "AZURE_*"]
|
||||
# force-set / override values
|
||||
set = { CI = "1" }
|
||||
# if provided, *only* vars matching these patterns are kept
|
||||
include_only = ["PATH", "HOME"]
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
| ------------------------- | -------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `inherit` | string | `core` | Starting template for the environment:<br>`core` (`HOME`, `PATH`, `USER`, …), `all` (clone full parent env), or `none` (start empty). |
|
||||
| `ignore_default_excludes` | boolean | `false` | When `false`, Codex removes any var whose **name** contains `KEY`, `SECRET`, or `TOKEN` (case-insensitive) before other rules run. |
|
||||
| `exclude` | array<string> | `[]` | Case-insensitive glob patterns to drop after the default filter.<br>Examples: `"AWS_*"`, `"AZURE_*"`. |
|
||||
| `set` | table<string,string> | `{}` | Explicit key/value overrides or additions – always win over inherited values. |
|
||||
| `include_only` | array<string> | `[]` | If non-empty, a whitelist of patterns; only variables that match _one_ pattern survive the final step. (Generally used with `inherit = "all"`.) |
|
||||
|
||||
The patterns are **glob style**, not full regular expressions: `*` matches any
|
||||
number of characters, `?` matches exactly one, and character classes like
|
||||
`[A-Z]`/`[^0-9]` are supported. Matching is always **case-insensitive**. This
|
||||
syntax is documented in code as `EnvironmentVariablePattern` (see
|
||||
`core/src/config_types.rs`).
|
||||
|
||||
If you just need a clean slate with a few custom entries you can write:
|
||||
|
||||
```toml
|
||||
[shell_environment_policy]
|
||||
inherit = "none"
|
||||
set = { PATH = "/usr/bin", MY_FLAG = "1" }
|
||||
```
|
||||
|
||||
Currently, `CODEX_SANDBOX_NETWORK_DISABLED=1` is also added to the environment, assuming network is disabled. This is not configurable.
|
||||
|
||||
## notify
|
||||
|
||||
Specify a program that will be executed to get notified about events generated by Codex. Note that the program will receive the notification argument as a string of JSON, e.g.:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "agent-turn-complete",
|
||||
"turn-id": "12345",
|
||||
"input-messages": ["Rename `foo` to `bar` and update the callsites."],
|
||||
"last-assistant-message": "Rename complete and verified `cargo build` succeeds."
|
||||
}
|
||||
```
|
||||
|
||||
The `"type"` property will always be set. Currently, `"agent-turn-complete"` is the only notification type that is supported.
|
||||
|
||||
As an example, here is a Python script that parses the JSON and decides whether to show a desktop push notification using [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: notify.py <NOTIFICATION_JSON>")
|
||||
return 1
|
||||
|
||||
try:
|
||||
notification = json.loads(sys.argv[1])
|
||||
except json.JSONDecodeError:
|
||||
return 1
|
||||
|
||||
match notification_type := notification.get("type"):
|
||||
case "agent-turn-complete":
|
||||
assistant_message = notification.get("last-assistant-message")
|
||||
if assistant_message:
|
||||
title = f"Codex: {assistant_message}"
|
||||
else:
|
||||
title = "Codex: Turn Complete!"
|
||||
input_messages = notification.get("input_messages", [])
|
||||
message = " ".join(input_messages)
|
||||
title += message
|
||||
case _:
|
||||
print(f"not sending a push notification for: {notification_type}")
|
||||
return 0
|
||||
|
||||
subprocess.check_output(
|
||||
[
|
||||
"terminal-notifier",
|
||||
"-title",
|
||||
title,
|
||||
"-message",
|
||||
message,
|
||||
"-group",
|
||||
"codex",
|
||||
"-ignoreDnD",
|
||||
"-activate",
|
||||
"com.googlecode.iterm2",
|
||||
]
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
```
|
||||
|
||||
To have Codex use this script for notifications, you would configure it via `notify` in `~/.codex/config.toml` using the appropriate path to `notify.py` on your computer:
|
||||
|
||||
```toml
|
||||
notify = ["python3", "/Users/mbolin/.codex/notify.py"]
|
||||
```
|
||||
|
||||
## history
|
||||
|
||||
By default, Codex CLI records messages sent to the model in `$CODEX_HOME/history.jsonl`. Note that on UNIX, the file permissions are set to `o600`, so it should only be readable and writable by the owner.
|
||||
|
||||
To disable this behavior, configure `[history]` as follows:
|
||||
|
||||
```toml
|
||||
[history]
|
||||
persistence = "none" # "save-all" is the default value
|
||||
```
|
||||
|
||||
## file_opener
|
||||
|
||||
Identifies the editor/URI scheme to use for hyperlinking citations in model output. If set, citations to files in the model output will be hyperlinked using the specified URI scheme so they can be ctrl/cmd-clicked from the terminal to open them.
|
||||
|
||||
For example, if the model output includes a reference such as `【F:/home/user/project/main.py†L42-L50】`, then this would be rewritten to link to the URI `vscode://file/home/user/project/main.py:42`.
|
||||
|
||||
Note this is **not** a general editor setting (like `$EDITOR`), as it only accepts a fixed set of values:
|
||||
|
||||
- `"vscode"` (default)
|
||||
- `"vscode-insiders"`
|
||||
- `"windsurf"`
|
||||
- `"cursor"`
|
||||
- `"none"` to explicitly disable this feature
|
||||
|
||||
Currently, `"vscode"` is the default, though Codex does not verify VS Code is installed. As such, `file_opener` may default to `"none"` or something else in the future.
|
||||
|
||||
## hide_agent_reasoning
|
||||
|
||||
Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
|
||||
Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:
|
||||
|
||||
```toml
|
||||
hide_agent_reasoning = true # defaults to false
|
||||
```
|
||||
|
||||
## model_context_window
|
||||
|
||||
The size of the context window for the model, in tokens.
|
||||
|
||||
In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
|
||||
|
||||
## model_max_output_tokens
|
||||
|
||||
This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
|
||||
|
||||
## project_doc_max_bytes
|
||||
|
||||
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
|
||||
|
||||
## tui
|
||||
|
||||
Options that are specific to the TUI.
|
||||
|
||||
```toml
|
||||
[tui]
|
||||
# This will make it so that Codex does not try to process mouse events, which
|
||||
# means your Terminal's native drag-to-text to text selection and copy/paste
|
||||
# should work. The tradeoff is that Codex will not receive any mouse events, so
|
||||
# it will not be possible to use the mouse to scroll conversation history.
|
||||
#
|
||||
# Note that most terminals support holding down a modifier key when using the
|
||||
# mouse to support text selection. For example, even if Codex mouse capture is
|
||||
# enabled (i.e., this is set to `false`), you can still hold down alt while
|
||||
# dragging the mouse to select text.
|
||||
disable_mouse_capture = true # defaults to `false`
|
||||
```
|
||||
@@ -16,6 +16,7 @@ async-channel = "2.3.1"
|
||||
base64 = "0.21"
|
||||
bytes = "1.10.1"
|
||||
codex-apply-patch = { path = "../apply-patch" }
|
||||
codex-login = { path = "../login" }
|
||||
codex-mcp-client = { path = "../mcp-client" }
|
||||
dirs = "6"
|
||||
env-flags = "0.1.1"
|
||||
@@ -31,6 +32,8 @@ rand = "0.9"
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
strum = "0.27.1"
|
||||
strum_macros = "0.27.1"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
|
||||
tokio = { version = "1", features = [
|
||||
@@ -46,9 +49,9 @@ tracing = { version = "0.1.41", features = ["log"] }
|
||||
tree-sitter = "0.25.3"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
uuid = { version = "1", features = ["serde", "v4"] }
|
||||
wildmatch = "2.4.0"
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
libc = "0.2.172"
|
||||
landlock = "0.4.1"
|
||||
seccompiler = "0.5.0"
|
||||
|
||||
@@ -56,8 +59,13 @@ seccompiler = "0.5.0"
|
||||
[target.x86_64-unknown-linux-musl.dependencies]
|
||||
openssl-sys = { version = "*", features = ["vendored"] }
|
||||
|
||||
# Build OpenSSL from source for musl builds.
|
||||
[target.aarch64-unknown-linux-musl.dependencies]
|
||||
openssl-sys = { version = "*", features = ["vendored"] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2"
|
||||
maplit = "1.0.2"
|
||||
predicates = "3"
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
|
||||
45
codex-rs/core/config_template.toml
Normal file
45
codex-rs/core/config_template.toml
Normal file
@@ -0,0 +1,45 @@
|
||||
# Codex configuration template
|
||||
# See https://github.com/openai/codex/blob/main/codex-rs/config.md for details.
|
||||
# All values below represent defaults. Uncomment to override them.
|
||||
|
||||
# model = "codex-mini-latest"
|
||||
# model_provider = "openai"
|
||||
# approval_policy = "unless-allow-listed"
|
||||
# disable_response_storage = false
|
||||
# project_doc_max_bytes = 32768
|
||||
# file_opener = "vscode"
|
||||
# hide_agent_reasoning = false
|
||||
# model_reasoning_effort = "medium"
|
||||
# model_reasoning_summary = "auto"
|
||||
|
||||
[shell_environment_policy]
|
||||
# inherit = "core"
|
||||
# ignore_default_excludes = false
|
||||
# exclude = []
|
||||
# set = {}
|
||||
# include_only = []
|
||||
|
||||
[sandbox]
|
||||
# mode = "read-only"
|
||||
# writable_roots = []
|
||||
# network_access = false
|
||||
|
||||
[history]
|
||||
# persistence = "save-all"
|
||||
|
||||
[tui]
|
||||
# disable_mouse_capture = false
|
||||
|
||||
# Example provider override
|
||||
#[model_providers.openai]
|
||||
# name = "OpenAI"
|
||||
# base_url = "https://api.openai.com/v1"
|
||||
# env_key = "OPENAI_API_KEY"
|
||||
# wire_api = "chat"
|
||||
|
||||
# Example profile
|
||||
#[profiles.example]
|
||||
# model = "o3"
|
||||
# model_provider = "openai"
|
||||
# approval_policy = "never"
|
||||
|
||||
@@ -25,10 +25,10 @@ use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::models::ContentItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_chat_completions_api;
|
||||
use crate::util::backoff;
|
||||
|
||||
/// Implementation for the classic Chat Completions API. This is intentionally
|
||||
/// minimal: we only stream back plain assistant text.
|
||||
/// Implementation for the classic Chat Completions API.
|
||||
pub(crate) async fn stream_chat_completions(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
@@ -38,35 +38,88 @@ pub(crate) async fn stream_chat_completions(
|
||||
// Build messages array
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions();
|
||||
let full_instructions = prompt.get_full_instructions(model);
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
|
||||
for item in &prompt.input {
|
||||
if let ResponseItem::Message { role, content } = item {
|
||||
let mut text = String::new();
|
||||
for c in content {
|
||||
match c {
|
||||
ContentItem::InputText { text: t } | ContentItem::OutputText { text: t } => {
|
||||
text.push_str(t);
|
||||
match item {
|
||||
ResponseItem::Message { role, content } => {
|
||||
let mut text = String::new();
|
||||
for c in content {
|
||||
match c {
|
||||
ContentItem::InputText { text: t }
|
||||
| ContentItem::OutputText { text: t } => {
|
||||
text.push_str(t);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
messages.push(json!({"role": role, "content": text}));
|
||||
}
|
||||
ResponseItem::FunctionCall {
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
} => {
|
||||
messages.push(json!({
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": name,
|
||||
"arguments": arguments,
|
||||
}
|
||||
}]
|
||||
}));
|
||||
}
|
||||
ResponseItem::LocalShellCall {
|
||||
id,
|
||||
call_id: _,
|
||||
status,
|
||||
action,
|
||||
} => {
|
||||
// Confirm with API team.
|
||||
messages.push(json!({
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [{
|
||||
"id": id.clone().unwrap_or_else(|| "".to_string()),
|
||||
"type": "local_shell_call",
|
||||
"status": status,
|
||||
"action": action,
|
||||
}]
|
||||
}));
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
messages.push(json!({
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": output.content,
|
||||
}));
|
||||
}
|
||||
ResponseItem::Reasoning { .. } | ResponseItem::Other => {
|
||||
// Omit these items from the conversation history.
|
||||
continue;
|
||||
}
|
||||
messages.push(json!({"role": role, "content": text}));
|
||||
}
|
||||
}
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
|
||||
let payload = json!({
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": true
|
||||
"stream": true,
|
||||
"tools": tools_json,
|
||||
});
|
||||
|
||||
let base_url = provider.base_url.trim_end_matches('/');
|
||||
let url = format!("{}/chat/completions", base_url);
|
||||
let url = provider.get_full_url();
|
||||
|
||||
debug!(url, "POST (chat)");
|
||||
trace!("request payload: {}", payload);
|
||||
debug!(
|
||||
"POST to {url}: {}",
|
||||
serde_json::to_string_pretty(&payload).unwrap_or_default()
|
||||
);
|
||||
|
||||
let api_key = provider.api_key()?;
|
||||
let mut attempt = 0;
|
||||
@@ -134,6 +187,21 @@ where
|
||||
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
|
||||
// State to accumulate a function call across streaming chunks.
|
||||
// OpenAI may split the `arguments` string over multiple `delta` events
|
||||
// until the chunk whose `finish_reason` is `tool_calls` is emitted. We
|
||||
// keep collecting the pieces here and forward a single
|
||||
// `ResponseItem::FunctionCall` once the call is complete.
|
||||
#[derive(Default)]
|
||||
struct FunctionCallState {
|
||||
name: Option<String>,
|
||||
arguments: String,
|
||||
call_id: Option<String>,
|
||||
active: bool,
|
||||
}
|
||||
|
||||
let mut fn_call_state = FunctionCallState::default();
|
||||
|
||||
loop {
|
||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||
Ok(Some(Ok(ev))) => ev,
|
||||
@@ -146,6 +214,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -163,6 +232,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -173,23 +243,90 @@ where
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
trace!("chat_completions received SSE chunk: {chunk:?}");
|
||||
|
||||
let content_opt = chunk
|
||||
.get("choices")
|
||||
.and_then(|c| c.get(0))
|
||||
.and_then(|c| c.get("delta"))
|
||||
.and_then(|d| d.get("content"))
|
||||
.and_then(|c| c.as_str());
|
||||
let choice_opt = chunk.get("choices").and_then(|c| c.get(0));
|
||||
|
||||
if let Some(content) = content_opt {
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: content.to_string(),
|
||||
}],
|
||||
};
|
||||
if let Some(choice) = choice_opt {
|
||||
// Handle assistant content tokens.
|
||||
if let Some(content) = choice
|
||||
.get("delta")
|
||||
.and_then(|d| d.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
{
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: content.to_string(),
|
||||
}],
|
||||
};
|
||||
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
|
||||
// Handle streaming function / tool calls.
|
||||
if let Some(tool_calls) = choice
|
||||
.get("delta")
|
||||
.and_then(|d| d.get("tool_calls"))
|
||||
.and_then(|tc| tc.as_array())
|
||||
{
|
||||
if let Some(tool_call) = tool_calls.first() {
|
||||
// Mark that we have an active function call in progress.
|
||||
fn_call_state.active = true;
|
||||
|
||||
// Extract call_id if present.
|
||||
if let Some(id) = tool_call.get("id").and_then(|v| v.as_str()) {
|
||||
fn_call_state.call_id.get_or_insert_with(|| id.to_string());
|
||||
}
|
||||
|
||||
// Extract function details if present.
|
||||
if let Some(function) = tool_call.get("function") {
|
||||
if let Some(name) = function.get("name").and_then(|n| n.as_str()) {
|
||||
fn_call_state.name.get_or_insert_with(|| name.to_string());
|
||||
}
|
||||
|
||||
if let Some(args_fragment) =
|
||||
function.get("arguments").and_then(|a| a.as_str())
|
||||
{
|
||||
fn_call_state.arguments.push_str(args_fragment);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit end-of-turn when finish_reason signals completion.
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
||||
match finish_reason {
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// Build the FunctionCall response item.
|
||||
let item = ResponseItem::FunctionCall {
|
||||
name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
|
||||
arguments: fn_call_state.arguments.clone(),
|
||||
call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
|
||||
};
|
||||
|
||||
// Emit it downstream.
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
"stop" => {
|
||||
// Regular turn without tool-call.
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Emit Completed regardless of reason so the agent can advance.
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
|
||||
// Prepare for potential next turn (should not happen in same stream).
|
||||
// fn_call_state = FunctionCallState::default();
|
||||
|
||||
return; // End processing for this SSE stream.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -236,9 +373,14 @@ where
|
||||
Poll::Ready(None) => return Poll::Ready(None),
|
||||
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
|
||||
// Accumulate *assistant* text but do not emit yet.
|
||||
if let crate::models::ResponseItem::Message { role, content } = &item {
|
||||
if role == "assistant" {
|
||||
// If this is an incremental assistant message chunk, accumulate but
|
||||
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
|
||||
// away so downstream consumers see it.
|
||||
|
||||
let is_assistant_delta = matches!(&item, crate::models::ResponseItem::Message { role, .. } if role == "assistant");
|
||||
|
||||
if is_assistant_delta {
|
||||
if let crate::models::ResponseItem::Message { content, .. } = &item {
|
||||
if let Some(text) = content.iter().find_map(|c| match c {
|
||||
crate::models::ContentItem::OutputText { text } => Some(text),
|
||||
_ => None,
|
||||
@@ -246,12 +388,18 @@ where
|
||||
this.cumulative.push_str(text);
|
||||
}
|
||||
}
|
||||
|
||||
// Swallow partial assistant chunk; keep polling.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Swallow partial event; keep polling.
|
||||
continue;
|
||||
// Not an assistant message – forward immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}))) => {
|
||||
if !this.cumulative.is_empty() {
|
||||
let aggregated_item = crate::models::ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
@@ -261,7 +409,10 @@ where
|
||||
};
|
||||
|
||||
// Buffer Completed so it is returned *after* the aggregated message.
|
||||
this.pending_completed = Some(ResponseEvent::Completed { response_id });
|
||||
this.pending_completed = Some(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
});
|
||||
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
|
||||
aggregated_item,
|
||||
@@ -269,8 +420,16 @@ where
|
||||
}
|
||||
|
||||
// Nothing aggregated – forward Completed directly.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
|
||||
} // No other `Ok` variants exist at the moment, continue polling.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
})));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
||||
// These events are exclusive to the Responses API and
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -284,7 +443,7 @@ pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Size
|
||||
///
|
||||
/// ```ignore
|
||||
/// OutputItemDone(<full message>)
|
||||
/// Completed { .. }
|
||||
/// Completed
|
||||
/// ```
|
||||
///
|
||||
/// No other `OutputItemDone` events will be seen by the caller.
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::BufRead;
|
||||
use std::path::Path;
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
@@ -11,7 +9,6 @@ use reqwest::StatusCode;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::timeout;
|
||||
use tokio_util::io::ReaderStream;
|
||||
@@ -21,12 +18,13 @@ use tracing::warn;
|
||||
|
||||
use crate::chat_completions::AggregateStreamExt;
|
||||
use crate::chat_completions::stream_chat_completions;
|
||||
use crate::client_common::Payload;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::Reasoning;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::client_common::Summary;
|
||||
use crate::client_common::ResponsesApiRequest;
|
||||
use crate::client_common::create_reasoning_param_for_request;
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::error::Result;
|
||||
@@ -36,84 +34,32 @@ use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_responses_api;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::util::backoff;
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
/// Responses API.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum OpenAiTool {
|
||||
#[serde(rename = "function")]
|
||||
Function(ResponsesApiTool),
|
||||
#[serde(rename = "local_shell")]
|
||||
LocalShell {},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct ResponsesApiTool {
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
strict: bool,
|
||||
parameters: JsonSchema,
|
||||
}
|
||||
|
||||
/// Generic JSON‑Schema subset needed for our tool definitions
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
enum JsonSchema {
|
||||
String,
|
||||
Number,
|
||||
Array {
|
||||
items: Box<JsonSchema>,
|
||||
},
|
||||
Object {
|
||||
properties: BTreeMap<String, JsonSchema>,
|
||||
required: &'static [&'static str],
|
||||
#[serde(rename = "additionalProperties")]
|
||||
additional_properties: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// Tool usage specification
|
||||
static DEFAULT_TOOLS: LazyLock<Vec<OpenAiTool>> = LazyLock::new(|| {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String),
|
||||
},
|
||||
);
|
||||
properties.insert("workdir".to_string(), JsonSchema::String);
|
||||
properties.insert("timeout".to_string(), JsonSchema::Number);
|
||||
|
||||
vec![OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell",
|
||||
description: "Runs a shell command, and returns its output.",
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: &["command"],
|
||||
additional_properties: false,
|
||||
},
|
||||
})]
|
||||
});
|
||||
|
||||
static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
|
||||
LazyLock::new(|| vec![OpenAiTool::LocalShell {}]);
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ModelClient {
|
||||
model: String,
|
||||
client: reqwest::Client,
|
||||
provider: ModelProviderInfo,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
}
|
||||
|
||||
impl ModelClient {
|
||||
pub fn new(model: impl ToString, provider: ModelProviderInfo) -> Self {
|
||||
pub fn new(
|
||||
model: impl ToString,
|
||||
provider: ModelProviderInfo,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
model: model.to_string(),
|
||||
client: reqwest::Client::new(),
|
||||
provider,
|
||||
effort,
|
||||
summary,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,48 +107,24 @@ impl ModelClient {
|
||||
return stream_from_fixture(path).await;
|
||||
}
|
||||
|
||||
// Assemble tool list: built-in tools + any extra tools from the prompt.
|
||||
let default_tools = if self.model.starts_with("codex") {
|
||||
&DEFAULT_CODEX_MODEL_TOOLS
|
||||
} else {
|
||||
&DEFAULT_TOOLS
|
||||
};
|
||||
let mut tools_json = Vec::with_capacity(default_tools.len() + prompt.extra_tools.len());
|
||||
for t in default_tools.iter() {
|
||||
tools_json.push(serde_json::to_value(t)?);
|
||||
}
|
||||
tools_json.extend(
|
||||
prompt
|
||||
.extra_tools
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
|
||||
);
|
||||
|
||||
debug!("tools_json: {}", serde_json::to_string_pretty(&tools_json)?);
|
||||
|
||||
let full_instructions = prompt.get_full_instructions();
|
||||
let payload = Payload {
|
||||
let full_instructions = prompt.get_full_instructions(&self.model);
|
||||
let tools_json = create_tools_json_for_responses_api(prompt, &self.model)?;
|
||||
let reasoning = create_reasoning_param_for_request(&self.model, self.effort, self.summary);
|
||||
let payload = ResponsesApiRequest {
|
||||
model: &self.model,
|
||||
instructions: &full_instructions,
|
||||
input: &prompt.input,
|
||||
tools: &tools_json,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
reasoning: Some(Reasoning {
|
||||
effort: "high",
|
||||
summary: Some(Summary::Auto),
|
||||
}),
|
||||
reasoning,
|
||||
previous_response_id: prompt.prev_id.clone(),
|
||||
store: prompt.store,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
let base_url = self.provider.base_url.clone();
|
||||
let base_url = base_url.trim_end_matches('/');
|
||||
let url = format!("{}/responses", base_url);
|
||||
debug!(url, "POST");
|
||||
trace!("request payload: {}", serde_json::to_string(&payload)?);
|
||||
let url = self.provider.get_full_url();
|
||||
trace!("POST to {url}: {}", serde_json::to_string(&payload)?);
|
||||
|
||||
let mut attempt = 0;
|
||||
loop {
|
||||
@@ -244,7 +166,7 @@ impl ModelClient {
|
||||
// negligible.
|
||||
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
|
||||
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
|
||||
let body = (res.text().await).unwrap_or_default();
|
||||
let body = res.text().await.unwrap_or_default();
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
@@ -276,20 +198,6 @@ impl ModelClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn mcp_tool_to_openai_tool(
|
||||
fully_qualified_name: String,
|
||||
tool: mcp_types::Tool,
|
||||
) -> serde_json::Value {
|
||||
// TODO(mbolin): Change the contract of this function to return
|
||||
// ResponsesApiTool.
|
||||
json!({
|
||||
"name": fully_qualified_name,
|
||||
"description": tool.description,
|
||||
"parameters": tool.input_schema,
|
||||
"type": "function",
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct SseEvent {
|
||||
#[serde(rename = "type")]
|
||||
@@ -298,9 +206,44 @@ struct SseEvent {
|
||||
item: Option<Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCreated {}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompleted {
|
||||
id: String,
|
||||
usage: Option<ResponseCompletedUsage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedUsage {
|
||||
input_tokens: u64,
|
||||
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
|
||||
output_tokens: u64,
|
||||
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
|
||||
total_tokens: u64,
|
||||
}
|
||||
|
||||
impl From<ResponseCompletedUsage> for TokenUsage {
|
||||
fn from(val: ResponseCompletedUsage) -> Self {
|
||||
TokenUsage {
|
||||
input_tokens: val.input_tokens,
|
||||
cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
|
||||
output_tokens: val.output_tokens,
|
||||
reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
|
||||
total_tokens: val.total_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedInputTokensDetails {
|
||||
cached_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedOutputTokensDetails {
|
||||
reasoning_tokens: u64,
|
||||
}
|
||||
|
||||
async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
|
||||
@@ -312,7 +255,7 @@ where
|
||||
// If the stream stays completely silent for an extended period treat it as disconnected.
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
// The response id returned from the "complete" message.
|
||||
let mut response_id = None;
|
||||
let mut response_completed: Option<ResponseCompleted> = None;
|
||||
|
||||
loop {
|
||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||
@@ -324,9 +267,15 @@ where
|
||||
return;
|
||||
}
|
||||
Ok(None) => {
|
||||
match response_id {
|
||||
Some(response_id) => {
|
||||
let event = ResponseEvent::Completed { response_id };
|
||||
match response_completed {
|
||||
Some(ResponseCompleted {
|
||||
id: response_id,
|
||||
usage,
|
||||
}) => {
|
||||
let event = ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: usage.map(Into::into),
|
||||
};
|
||||
let _ = tx_event.send(Ok(event)).await;
|
||||
}
|
||||
None => {
|
||||
@@ -387,12 +336,17 @@ where
|
||||
return;
|
||||
}
|
||||
}
|
||||
"response.created" => {
|
||||
if event.response.is_some() {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::Created {})).await;
|
||||
}
|
||||
}
|
||||
// Final response completed – includes array of output items & id
|
||||
"response.completed" => {
|
||||
if let Some(resp_val) = event.response {
|
||||
match serde_json::from_value::<ResponseCompleted>(resp_val) {
|
||||
Ok(r) => {
|
||||
response_id = Some(r.id);
|
||||
response_completed = Some(r);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("failed to parse ResponseCompleted: {e}");
|
||||
@@ -401,6 +355,18 @@ where
|
||||
};
|
||||
};
|
||||
}
|
||||
"response.content_part.done"
|
||||
| "response.function_call_arguments.delta"
|
||||
| "response.in_progress"
|
||||
| "response.output_item.added"
|
||||
| "response.output_text.delta"
|
||||
| "response.output_text.done"
|
||||
| "response.reasoning_summary_part.added"
|
||||
| "response.reasoning_summary_text.delta"
|
||||
| "response.reasoning_summary_text.done" => {
|
||||
// Currently, we ignore these events, but we handle them
|
||||
// separately to skip the logging message in the `other` case.
|
||||
}
|
||||
other => debug!(other, "sse event"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::Result;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::protocol::TokenUsage;
|
||||
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
||||
use futures::Stream;
|
||||
use serde::Serialize;
|
||||
use std::borrow::Cow;
|
||||
@@ -22,7 +26,7 @@ pub struct Prompt {
|
||||
pub prev_id: Option<String>,
|
||||
/// Optional instructions from the user to amend to the built-in agent
|
||||
/// instructions.
|
||||
pub instructions: Option<String>,
|
||||
pub user_instructions: Option<String>,
|
||||
/// Whether to store response on server side (disable_response_storage = !store).
|
||||
pub store: bool,
|
||||
|
||||
@@ -33,44 +37,83 @@ pub struct Prompt {
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub(crate) fn get_full_instructions(&self) -> Cow<str> {
|
||||
match &self.instructions {
|
||||
Some(instructions) => {
|
||||
let instructions = format!("{BASE_INSTRUCTIONS}\n{instructions}");
|
||||
Cow::Owned(instructions)
|
||||
}
|
||||
None => Cow::Borrowed(BASE_INSTRUCTIONS),
|
||||
pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<str> {
|
||||
let mut sections: Vec<&str> = vec![BASE_INSTRUCTIONS];
|
||||
if let Some(ref user) = self.user_instructions {
|
||||
sections.push(user);
|
||||
}
|
||||
if model.starts_with("gpt-4.1") {
|
||||
sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
|
||||
}
|
||||
Cow::Owned(sections.join("\n"))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
Completed { response_id: String },
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct Reasoning {
|
||||
pub(crate) effort: &'static str,
|
||||
pub(crate) effort: OpenAiReasoningEffort,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) summary: Option<Summary>,
|
||||
pub(crate) summary: Option<OpenAiReasoningSummary>,
|
||||
}
|
||||
|
||||
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning
|
||||
#[derive(Debug, Serialize, Default, Clone, Copy)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum OpenAiReasoningEffort {
|
||||
Low,
|
||||
#[default]
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
impl From<ReasoningEffortConfig> for Option<OpenAiReasoningEffort> {
|
||||
fn from(effort: ReasoningEffortConfig) -> Self {
|
||||
match effort {
|
||||
ReasoningEffortConfig::Low => Some(OpenAiReasoningEffort::Low),
|
||||
ReasoningEffortConfig::Medium => Some(OpenAiReasoningEffort::Medium),
|
||||
ReasoningEffortConfig::High => Some(OpenAiReasoningEffort::High),
|
||||
ReasoningEffortConfig::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A summary of the reasoning performed by the model. This can be useful for
|
||||
/// debugging and understanding the model's reasoning process.
|
||||
#[derive(Debug, Serialize)]
|
||||
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
|
||||
#[derive(Debug, Serialize, Default, Clone, Copy)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum Summary {
|
||||
pub(crate) enum OpenAiReasoningSummary {
|
||||
#[default]
|
||||
Auto,
|
||||
#[allow(dead_code)] // Will go away once this is configurable.
|
||||
Concise,
|
||||
#[allow(dead_code)] // Will go away once this is configurable.
|
||||
Detailed,
|
||||
}
|
||||
|
||||
impl From<ReasoningSummaryConfig> for Option<OpenAiReasoningSummary> {
|
||||
fn from(summary: ReasoningSummaryConfig) -> Self {
|
||||
match summary {
|
||||
ReasoningSummaryConfig::Auto => Some(OpenAiReasoningSummary::Auto),
|
||||
ReasoningSummaryConfig::Concise => Some(OpenAiReasoningSummary::Concise),
|
||||
ReasoningSummaryConfig::Detailed => Some(OpenAiReasoningSummary::Detailed),
|
||||
ReasoningSummaryConfig::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request object that is serialized as JSON and POST'ed when using the
|
||||
/// Responses API.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct Payload<'a> {
|
||||
pub(crate) struct ResponsesApiRequest<'a> {
|
||||
pub(crate) model: &'a str,
|
||||
pub(crate) instructions: &'a str,
|
||||
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
|
||||
@@ -88,6 +131,40 @@ pub(crate) struct Payload<'a> {
|
||||
pub(crate) stream: bool,
|
||||
}
|
||||
|
||||
pub(crate) fn create_reasoning_param_for_request(
|
||||
model: &str,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
) -> Option<Reasoning> {
|
||||
let effort: Option<OpenAiReasoningEffort> = effort.into();
|
||||
let effort = effort?;
|
||||
|
||||
if model_supports_reasoning_summaries(model) {
|
||||
Some(Reasoning {
|
||||
effort,
|
||||
summary: summary.into(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn model_supports_reasoning_summaries(model: &str) -> bool {
|
||||
// Currently, we hardcode this rule to decide whether enable reasoning.
|
||||
// We expect reasoning to apply only to OpenAI models, but we do not want
|
||||
// users to have to mess with their config to disable reasoning for models
|
||||
// that do not support it, such as `gpt-4.1`.
|
||||
//
|
||||
// Though if a user is using Codex with non-OpenAI models that, say, happen
|
||||
// to start with "o", then they can set `model_reasoning_effort = "none` in
|
||||
// config.toml to disable reasoning.
|
||||
//
|
||||
// Ultimately, this should also be configurable in config.toml, but we
|
||||
// need to have defaults that "just work." Perhaps we could have a
|
||||
// "reasoning models pattern" as part of ModelProviderInfo?
|
||||
model.starts_with("o") || model.starts_with("codex")
|
||||
}
|
||||
|
||||
pub(crate) struct ResponseStream {
|
||||
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// Poisoned mutex should fail the program
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
@@ -20,6 +21,7 @@ use codex_apply_patch::MaybeApplyPatchVerified;
|
||||
use codex_apply_patch::maybe_parse_apply_patch_verified;
|
||||
use codex_apply_patch::print_summary;
|
||||
use futures::prelude::*;
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
use tokio::sync::Notify;
|
||||
@@ -37,6 +39,7 @@ use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
@@ -45,6 +48,7 @@ use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::process_exec_tool_call;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
|
||||
use crate::mcp_connection_manager::McpConnectionManager;
|
||||
use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
|
||||
@@ -56,7 +60,7 @@ use crate::models::ReasoningItemReasoningSummary;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::models::ShellToolCallParams;
|
||||
use crate::project_doc::create_full_instructions;
|
||||
use crate::project_doc::get_user_instructions;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::AgentReasoningEvent;
|
||||
use crate::protocol::ApplyPatchApprovalRequestEvent;
|
||||
@@ -77,6 +81,7 @@ use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::Submission;
|
||||
use crate::protocol::TaskCompleteEvent;
|
||||
use crate::rollout::RolloutRecorder;
|
||||
use crate::safety::SafetyCheck;
|
||||
use crate::safety::assess_command_safety;
|
||||
@@ -100,10 +105,12 @@ impl Codex {
|
||||
let (tx_sub, rx_sub) = async_channel::bounded(64);
|
||||
let (tx_event, rx_event) = async_channel::bounded(64);
|
||||
|
||||
let instructions = create_full_instructions(&config).await;
|
||||
let instructions = get_user_instructions(&config).await;
|
||||
let configure_session = Op::ConfigureSession {
|
||||
provider: config.model_provider.clone(),
|
||||
model: config.model.clone(),
|
||||
model_reasoning_effort: config.model_reasoning_effort,
|
||||
model_reasoning_summary: config.model_reasoning_summary,
|
||||
instructions,
|
||||
approval_policy: config.approval_policy,
|
||||
sandbox_policy: config.sandbox_policy.clone(),
|
||||
@@ -170,6 +177,7 @@ pub(crate) struct Session {
|
||||
instructions: Option<String>,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
shell_environment_policy: ShellEnvironmentPolicy,
|
||||
writable_roots: Mutex<Vec<PathBuf>>,
|
||||
|
||||
/// Manager for external MCP servers/tools.
|
||||
@@ -181,8 +189,9 @@ pub(crate) struct Session {
|
||||
|
||||
/// Optional rollout recorder for persisting the conversation transcript so
|
||||
/// sessions can be replayed or inspected later.
|
||||
rollout: Mutex<Option<crate::rollout::RolloutRecorder>>,
|
||||
rollout: Mutex<Option<RolloutRecorder>>,
|
||||
state: Mutex<State>,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Session {
|
||||
@@ -198,6 +207,9 @@ impl Session {
|
||||
struct State {
|
||||
approved_commands: HashSet<Vec<String>>,
|
||||
current_task: Option<AgentTask>,
|
||||
/// Call IDs that have been sent from the Responses API but have not been sent back yet.
|
||||
/// You CANNOT send a Responses API follow-up message unless you have sent back the output for all pending calls or else it will 400.
|
||||
pending_call_ids: HashSet<String>,
|
||||
previous_response_id: Option<String>,
|
||||
pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
|
||||
pending_input: Vec<ResponseInputItem>,
|
||||
@@ -290,10 +302,21 @@ impl Session {
|
||||
state.approved_commands.insert(cmd);
|
||||
}
|
||||
|
||||
/// Records items to both the rollout and the chat completions/ZDR
|
||||
/// transcript, if enabled.
|
||||
async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
debug!("Recording items for conversation: {items:?}");
|
||||
self.record_rollout_items(items).await;
|
||||
|
||||
if let Some(transcript) = self.state.lock().unwrap().zdr_transcript.as_mut() {
|
||||
transcript.record_items(items);
|
||||
}
|
||||
}
|
||||
|
||||
/// Append the given items to the session's rollout transcript (if enabled)
|
||||
/// and persist them to disk.
|
||||
async fn record_rollout_items(&self, items: &[ResponseItem]) {
|
||||
// Clone the recorder outside of the mutex so we don’t hold the lock
|
||||
// Clone the recorder outside of the mutex so we don't hold the lock
|
||||
// across an await point (MutexGuard is not Send).
|
||||
let recorder = {
|
||||
let guard = self.rollout.lock().unwrap();
|
||||
@@ -383,7 +406,7 @@ impl Session {
|
||||
tool: &str,
|
||||
arguments: Option<serde_json::Value>,
|
||||
timeout: Option<Duration>,
|
||||
) -> anyhow::Result<mcp_types::CallToolResult> {
|
||||
) -> anyhow::Result<CallToolResult> {
|
||||
self.mcp_connection_manager
|
||||
.call_tool(server, tool, arguments, timeout)
|
||||
.await
|
||||
@@ -392,6 +415,8 @@ impl Session {
|
||||
pub fn abort(&self) {
|
||||
info!("Aborting existing session");
|
||||
let mut state = self.state.lock().unwrap();
|
||||
// Don't clear pending_call_ids because we need to keep track of them to ensure we don't 400 on the next turn.
|
||||
// We will generate a synthetic aborted response for each pending call id.
|
||||
state.pending_approvals.clear();
|
||||
state.pending_input.clear();
|
||||
if let Some(task) = state.current_task.take() {
|
||||
@@ -412,7 +437,7 @@ impl Session {
|
||||
}
|
||||
|
||||
let Ok(json) = serde_json::to_string(¬ification) else {
|
||||
tracing::error!("failed to serialise notification payload");
|
||||
error!("failed to serialise notification payload");
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -424,7 +449,7 @@ impl Session {
|
||||
|
||||
// Fire-and-forget – we do not wait for completion.
|
||||
if let Err(e) = command.spawn() {
|
||||
tracing::warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
||||
warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -537,6 +562,8 @@ async fn submission_loop(
|
||||
Op::ConfigureSession {
|
||||
provider,
|
||||
model,
|
||||
model_reasoning_effort,
|
||||
model_reasoning_summary,
|
||||
instructions,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
@@ -558,7 +585,12 @@ async fn submission_loop(
|
||||
return;
|
||||
}
|
||||
|
||||
let client = ModelClient::new(model.clone(), provider.clone());
|
||||
let client = ModelClient::new(
|
||||
model.clone(),
|
||||
provider.clone(),
|
||||
model_reasoning_effort,
|
||||
model_reasoning_summary,
|
||||
);
|
||||
|
||||
// abort any current running session and clone its state
|
||||
let retain_zdr_transcript =
|
||||
@@ -621,7 +653,7 @@ async fn submission_loop(
|
||||
match RolloutRecorder::new(&config, session_id, instructions.clone()).await {
|
||||
Ok(r) => Some(r),
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to initialise rollout recorder: {e}");
|
||||
warn!("failed to initialise rollout recorder: {e}");
|
||||
None
|
||||
}
|
||||
};
|
||||
@@ -633,12 +665,14 @@ async fn submission_loop(
|
||||
instructions,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
shell_environment_policy: config.shell_environment_policy.clone(),
|
||||
cwd,
|
||||
writable_roots,
|
||||
mcp_connection_manager,
|
||||
notify,
|
||||
state: Mutex::new(state),
|
||||
rollout: Mutex::new(rollout_recorder),
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
}));
|
||||
|
||||
// Gather history metadata for SessionConfiguredEvent.
|
||||
@@ -714,7 +748,7 @@ async fn submission_loop(
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = crate::message_history::append_entry(&text, &id, &config).await
|
||||
{
|
||||
tracing::warn!("failed to append to message history: {e}");
|
||||
warn!("failed to append to message history: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -744,7 +778,7 @@ async fn submission_loop(
|
||||
};
|
||||
|
||||
if let Err(e) = tx_event.send(event).await {
|
||||
tracing::warn!("failed to send GetHistoryEntryResponse event: {e}");
|
||||
warn!("failed to send GetHistoryEntryResponse event: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -753,6 +787,19 @@ async fn submission_loop(
|
||||
debug!("Agent loop exited");
|
||||
}
|
||||
|
||||
/// Takes a user message as input and runs a loop where, at each turn, the model
|
||||
/// replies with either:
|
||||
///
|
||||
/// - requested function calls
|
||||
/// - an assistant message
|
||||
///
|
||||
/// While it is possible for the model to return multiple of these items in a
|
||||
/// single turn, in practice, we generally one item per turn:
|
||||
///
|
||||
/// - If the model requests a function call, we execute it and send the output
|
||||
/// back to the model in the next turn.
|
||||
/// - If the model sends only an assistant message, we record it in the
|
||||
/// conversation history and consider the task complete.
|
||||
async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
if input.is_empty() {
|
||||
return;
|
||||
@@ -765,9 +812,14 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut pending_response_input: Vec<ResponseInputItem> = vec![ResponseInputItem::from(input)];
|
||||
let initial_input_for_turn = ResponseInputItem::from(input);
|
||||
sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
|
||||
.await;
|
||||
|
||||
let mut input_for_next_turn: Vec<ResponseInputItem> = vec![initial_input_for_turn];
|
||||
let last_agent_message: Option<String>;
|
||||
loop {
|
||||
let mut net_new_turn_input = pending_response_input
|
||||
let mut net_new_turn_input = input_for_next_turn
|
||||
.drain(..)
|
||||
.map(ResponseItem::from)
|
||||
.collect::<Vec<_>>();
|
||||
@@ -775,11 +827,12 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
// Note that pending_input would be something like a message the user
|
||||
// submitted through the UI while the model was running. Though the UI
|
||||
// may support this, the model might not.
|
||||
let pending_input = sess.get_pending_input().into_iter().map(ResponseItem::from);
|
||||
net_new_turn_input.extend(pending_input);
|
||||
|
||||
// Persist only the net-new items of this turn to the rollout.
|
||||
sess.record_rollout_items(&net_new_turn_input).await;
|
||||
let pending_input = sess
|
||||
.get_pending_input()
|
||||
.into_iter()
|
||||
.map(ResponseItem::from)
|
||||
.collect::<Vec<ResponseItem>>();
|
||||
sess.record_conversation_items(&pending_input).await;
|
||||
|
||||
// Construct the input that we will send to the model. When using the
|
||||
// Chat completions API (or ZDR clients), the model needs the full
|
||||
@@ -788,20 +841,24 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
// represents an append-only log without duplicates.
|
||||
let turn_input: Vec<ResponseItem> =
|
||||
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
|
||||
// If we are using Chat/ZDR, we need to send the transcript with every turn.
|
||||
|
||||
// 1. Build up the conversation history for the next turn.
|
||||
let full_transcript = [transcript.contents(), net_new_turn_input.clone()].concat();
|
||||
|
||||
// 2. Update the in-memory transcript so that future turns
|
||||
// include these items as part of the history.
|
||||
transcript.record_items(net_new_turn_input);
|
||||
|
||||
// Note that `transcript.record_items()` does some filtering
|
||||
// such that `full_transcript` may include items that were
|
||||
// excluded from `transcript`.
|
||||
full_transcript
|
||||
// If we are using Chat/ZDR, we need to send the transcript with
|
||||
// every turn. By induction, `transcript` already contains:
|
||||
// - The `input` that kicked off this task.
|
||||
// - Each `ResponseItem` that was recorded in the previous turn.
|
||||
// - Each response to a `ResponseItem` (in practice, the only
|
||||
// response type we seem to have is `FunctionCallOutput`).
|
||||
//
|
||||
// The only thing the `transcript` does not contain is the
|
||||
// `pending_input` that was injected while the model was
|
||||
// running. We need to add that to the conversation history
|
||||
// so that the model can see it in the next turn.
|
||||
[transcript.contents(), pending_input].concat()
|
||||
} else {
|
||||
// In practice, net_new_turn_input should contain only:
|
||||
// - User messages
|
||||
// - Outputs for function calls requested by the model
|
||||
net_new_turn_input.extend(pending_input);
|
||||
|
||||
// Responses API path – we can just send the new items and
|
||||
// record the same.
|
||||
net_new_turn_input
|
||||
@@ -822,38 +879,95 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
.collect();
|
||||
match run_turn(&sess, sub_id.clone(), turn_input).await {
|
||||
Ok(turn_output) => {
|
||||
let (items, responses): (Vec<_>, Vec<_>) = turn_output
|
||||
.into_iter()
|
||||
.map(|p| (p.item, p.response))
|
||||
.unzip();
|
||||
let responses = responses
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect::<Vec<ResponseInputItem>>();
|
||||
let last_assistant_message = get_last_assistant_message_from_turn(&items);
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in turn_output {
|
||||
let ProcessedResponseItem { item, response } = processed_response_item;
|
||||
match (&item, &response) {
|
||||
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
|
||||
// If the model returned a message, we need to record it.
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
}
|
||||
(
|
||||
ResponseItem::LocalShellCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let (content, success): (String, Option<bool>) = match result {
|
||||
Ok(CallToolResult { content, is_error }) => {
|
||||
match serde_json::to_string(content) {
|
||||
Ok(content) => (content, *is_error),
|
||||
Err(e) => {
|
||||
warn!("Failed to serialize MCP tool call output: {e}");
|
||||
(e.to_string(), Some(true))
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => (e.clone(), Some(true)),
|
||||
};
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload { content, success },
|
||||
},
|
||||
);
|
||||
}
|
||||
(ResponseItem::Reasoning { .. }, None) => {
|
||||
// Omit from conversation history.
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
}
|
||||
};
|
||||
if let Some(response) = response {
|
||||
responses.push(response);
|
||||
}
|
||||
}
|
||||
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items.is_empty() {
|
||||
// First persist model-generated output to the rollout file – this only borrows.
|
||||
sess.record_rollout_items(&items).await;
|
||||
|
||||
// For ZDR we also need to keep a transcript clone.
|
||||
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
|
||||
transcript.record_items(items);
|
||||
}
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await;
|
||||
}
|
||||
|
||||
if responses.is_empty() {
|
||||
debug!("Turn completed");
|
||||
last_agent_message = get_last_assistant_message_from_turn(
|
||||
&items_to_record_in_conversation_history,
|
||||
);
|
||||
sess.maybe_notify(UserNotification::AgentTurnComplete {
|
||||
turn_id: sub_id.clone(),
|
||||
input_messages: turn_input_messages,
|
||||
last_assistant_message,
|
||||
last_assistant_message: last_agent_message.clone(),
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
pending_response_input = responses;
|
||||
input_for_next_turn = responses;
|
||||
}
|
||||
Err(e) => {
|
||||
info!("Turn error: {e:#}");
|
||||
@@ -871,7 +985,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
||||
sess.remove_task(&sub_id);
|
||||
let event = Event {
|
||||
id: sub_id,
|
||||
msg: EventMsg::TaskComplete,
|
||||
msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }),
|
||||
};
|
||||
sess.tx_event.send(event).await.ok();
|
||||
}
|
||||
@@ -882,9 +996,8 @@ async fn run_turn(
|
||||
input: Vec<ResponseItem>,
|
||||
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
||||
// Decide whether to use server-side storage (previous_response_id) or disable it
|
||||
let (prev_id, store, is_first_turn) = {
|
||||
let (prev_id, store) = {
|
||||
let state = sess.state.lock().unwrap();
|
||||
let is_first_turn = state.previous_response_id.is_none();
|
||||
let store = state.zdr_transcript.is_none();
|
||||
let prev_id = if store {
|
||||
state.previous_response_id.clone()
|
||||
@@ -893,20 +1006,14 @@ async fn run_turn(
|
||||
// back, but trying to use it results in a 400.
|
||||
None
|
||||
};
|
||||
(prev_id, store, is_first_turn)
|
||||
};
|
||||
|
||||
let instructions = if is_first_turn {
|
||||
sess.instructions.clone()
|
||||
} else {
|
||||
None
|
||||
(prev_id, store)
|
||||
};
|
||||
|
||||
let extra_tools = sess.mcp_connection_manager.list_all_tools();
|
||||
let prompt = Prompt {
|
||||
input,
|
||||
prev_id,
|
||||
instructions,
|
||||
user_instructions: sess.instructions.clone(),
|
||||
store,
|
||||
extra_tools,
|
||||
};
|
||||
@@ -951,6 +1058,7 @@ async fn run_turn(
|
||||
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
|
||||
/// "handled" such that it produces a `ResponseInputItem` that needs to be
|
||||
/// sent back to the model on the next turn.
|
||||
#[derive(Debug)]
|
||||
struct ProcessedResponseItem {
|
||||
item: ResponseItem,
|
||||
response: Option<ResponseInputItem>,
|
||||
@@ -961,7 +1069,57 @@ async fn try_run_turn(
|
||||
sub_id: &str,
|
||||
prompt: &Prompt,
|
||||
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
||||
let mut stream = sess.client.clone().stream(prompt).await?;
|
||||
// call_ids that are part of this response.
|
||||
let completed_call_ids = prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// call_ids that were pending but are not part of this response.
|
||||
// This usually happens because the user interrupted the model before we responded to one of its tool calls
|
||||
// and then the user sent a follow-up message.
|
||||
let missing_calls = {
|
||||
sess.state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.pending_call_ids
|
||||
.iter()
|
||||
.filter_map(|call_id| {
|
||||
if completed_call_ids.contains(&call_id) {
|
||||
None
|
||||
} else {
|
||||
Some(call_id.clone())
|
||||
}
|
||||
})
|
||||
.map(|call_id| ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: Some(false),
|
||||
},
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let prompt: Cow<Prompt> = if missing_calls.is_empty() {
|
||||
Cow::Borrowed(prompt)
|
||||
} else {
|
||||
// Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
|
||||
let input = [missing_calls, prompt.input.clone()].concat();
|
||||
Cow::Owned(Prompt {
|
||||
input,
|
||||
..prompt.clone()
|
||||
})
|
||||
};
|
||||
|
||||
let mut stream = sess.client.clone().stream(&prompt).await?;
|
||||
|
||||
// Buffer all the incoming messages from the stream first, then execute them.
|
||||
// If we execute a function call in the middle of handling the stream, it can time out.
|
||||
@@ -973,11 +1131,43 @@ async fn try_run_turn(
|
||||
let mut output = Vec::new();
|
||||
for event in input {
|
||||
match event {
|
||||
ResponseEvent::Created => {
|
||||
let mut state = sess.state.lock().unwrap();
|
||||
// We successfully created a new response and ensured that all pending calls were included so we can clear the pending call ids.
|
||||
state.pending_call_ids.clear();
|
||||
}
|
||||
ResponseEvent::OutputItemDone(item) => {
|
||||
let call_id = match &item {
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(call_id) = call_id {
|
||||
// We just got a new call id so we need to make sure to respond to it in the next turn.
|
||||
let mut state = sess.state.lock().unwrap();
|
||||
state.pending_call_ids.insert(call_id.clone());
|
||||
}
|
||||
let response = handle_response_item(sess, sub_id, item.clone()).await?;
|
||||
|
||||
output.push(ProcessedResponseItem { item, response });
|
||||
}
|
||||
ResponseEvent::Completed { response_id } => {
|
||||
ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
} => {
|
||||
if let Some(token_usage) = token_usage {
|
||||
sess.tx_event
|
||||
.send(Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::TokenCount(token_usage),
|
||||
})
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
let mut state = sess.state.lock().unwrap();
|
||||
state.previous_response_id = Some(response_id);
|
||||
break;
|
||||
@@ -1024,7 +1214,7 @@ async fn handle_response_item(
|
||||
arguments,
|
||||
call_id,
|
||||
} => {
|
||||
tracing::info!("FunctionCall: {arguments}");
|
||||
info!("FunctionCall: {arguments}");
|
||||
Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await)
|
||||
}
|
||||
ResponseItem::LocalShellCall {
|
||||
@@ -1106,7 +1296,7 @@ async fn handle_function_call(
|
||||
// Unknown function: reply with structured failure so the model can adapt.
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("unsupported call: {}", name),
|
||||
success: None,
|
||||
},
|
||||
@@ -1122,6 +1312,7 @@ fn to_exec_params(params: ShellToolCallParams, sess: &Session) -> ExecParams {
|
||||
command: params.command,
|
||||
cwd: sess.resolve_path(params.workdir.clone()),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: create_env(&sess.shell_environment_policy),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1137,7 +1328,7 @@ fn parse_container_exec_arguments(
|
||||
// allow model to re-sample
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("failed to parse function arguments: {e}"),
|
||||
success: None,
|
||||
},
|
||||
@@ -1205,7 +1396,7 @@ async fn handle_container_exec_with_params(
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "exec command rejected by user".to_string(),
|
||||
success: None,
|
||||
},
|
||||
@@ -1221,7 +1412,7 @@ async fn handle_container_exec_with_params(
|
||||
SafetyCheck::Reject { reason } => {
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: crate::models::FunctionCallOutputPayload {
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("exec command rejected: {reason}"),
|
||||
success: None,
|
||||
},
|
||||
@@ -1237,6 +1428,7 @@ async fn handle_container_exec_with_params(
|
||||
sandbox_type,
|
||||
sess.ctrl_c.clone(),
|
||||
&sess.sandbox_policy,
|
||||
&sess.codex_linux_sandbox_exe,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -1268,7 +1460,7 @@ async fn handle_container_exec_with_params(
|
||||
}
|
||||
}
|
||||
Err(CodexErr::Sandbox(error)) => {
|
||||
handle_sanbox_error(error, sandbox_type, params, sess, sub_id, call_id).await
|
||||
handle_sandbox_error(error, sandbox_type, params, sess, sub_id, call_id).await
|
||||
}
|
||||
Err(e) => {
|
||||
// Handle non-sandbox errors
|
||||
@@ -1283,7 +1475,7 @@ async fn handle_container_exec_with_params(
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_sanbox_error(
|
||||
async fn handle_sandbox_error(
|
||||
error: SandboxErr,
|
||||
sandbox_type: SandboxType,
|
||||
params: ExecParams,
|
||||
@@ -1305,7 +1497,14 @@ async fn handle_sanbox_error(
|
||||
};
|
||||
}
|
||||
|
||||
// Ask the user to retry without sandbox
|
||||
// Note that when `error` is `SandboxErr::Denied`, it could be a false
|
||||
// positive. That is, it may have exited with a non-zero exit code, not
|
||||
// because the sandbox denied it, but because that is its expected behavior,
|
||||
// i.e., a grep command that did not match anything. Ideally we would
|
||||
// include additional metadata on the command to indicate whether non-zero
|
||||
// exit codes merit a retry.
|
||||
|
||||
// For now, we categorically ask the user to retry without sandbox.
|
||||
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
|
||||
.await;
|
||||
|
||||
@@ -1341,6 +1540,7 @@ async fn handle_sanbox_error(
|
||||
SandboxType::None,
|
||||
sess.ctrl_c.clone(),
|
||||
&sess.sandbox_policy,
|
||||
&sess.codex_linux_sandbox_exe,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -1746,7 +1946,7 @@ fn apply_changes_from_apply_patch(action: &ApplyPatchAction) -> anyhow::Result<A
|
||||
})
|
||||
}
|
||||
|
||||
fn get_writable_roots(cwd: &Path) -> Vec<std::path::PathBuf> {
|
||||
fn get_writable_roots(cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::new();
|
||||
if cfg!(target_os = "macos") {
|
||||
// On macOS, $TMPDIR is private to the user.
|
||||
@@ -1774,7 +1974,7 @@ fn get_writable_roots(cwd: &Path) -> Vec<std::path::PathBuf> {
|
||||
}
|
||||
|
||||
/// Exec output is a pre-serialized JSON payload
|
||||
fn format_exec_output(output: &str, exit_code: i32, duration: std::time::Duration) -> String {
|
||||
fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> String {
|
||||
#[derive(Serialize)]
|
||||
struct ExecMetadata {
|
||||
exit_code: i32,
|
||||
|
||||
@@ -1,16 +1,26 @@
|
||||
use crate::config_profile::ConfigProfile;
|
||||
use crate::config_types::History;
|
||||
use crate::config_types::McpServerConfig;
|
||||
use crate::config_types::ReasoningEffort;
|
||||
use crate::config_types::ReasoningSummary;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::config_types::ShellEnvironmentPolicyToml;
|
||||
use crate::config_types::Tui;
|
||||
use crate::config_types::UriBasedFileOpener;
|
||||
use crate::flags::OPENAI_DEFAULT_MODEL;
|
||||
use crate::mcp_server_config::McpServerConfig;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPermission;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use dirs::home_dir;
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use toml::Value as TomlValue;
|
||||
|
||||
const DEFAULT_CONFIG_TEMPLATE: &str = include_str!("../config_template.toml");
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -23,6 +33,12 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Key into the model_providers map that specifies which provider to use.
|
||||
pub model_provider_id: String,
|
||||
|
||||
@@ -34,6 +50,13 @@ pub struct Config {
|
||||
|
||||
pub sandbox_policy: SandboxPolicy,
|
||||
|
||||
pub shell_environment_policy: ShellEnvironmentPolicy,
|
||||
|
||||
/// When `true`, `AgentReasoning` events emitted by the backend will be
|
||||
/// suppressed from the frontend output. This can reduce visual noise when
|
||||
/// users are only interested in the final agent responses.
|
||||
pub hide_agent_reasoning: bool,
|
||||
|
||||
/// Disable server-side response storage (sends the full conversation
|
||||
/// context with every request). Currently necessary for OpenAI customers
|
||||
/// who have opted into Zero Data Retention (ZDR).
|
||||
@@ -91,73 +114,136 @@ pub struct Config {
|
||||
|
||||
/// Collection of settings that are specific to the TUI.
|
||||
pub tui: Tui,
|
||||
}
|
||||
|
||||
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct History {
|
||||
/// If true, history entries will not be written to disk.
|
||||
pub persistence: HistoryPersistence,
|
||||
|
||||
/// If set, the maximum size of the history file in bytes.
|
||||
/// TODO(mbolin): Not currently honored.
|
||||
pub max_bytes: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum HistoryPersistence {
|
||||
/// Save all history entries to disk.
|
||||
#[default]
|
||||
SaveAll,
|
||||
/// Do not write history to disk.
|
||||
None,
|
||||
}
|
||||
|
||||
/// Collection of settings that are specific to the TUI.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct Tui {
|
||||
/// By default, mouse capture is enabled in the TUI so that it is possible
|
||||
/// to scroll the conversation history with a mouse. This comes at the cost
|
||||
/// of not being able to use the mouse to select text in the TUI.
|
||||
/// (Most terminals support a modifier key to allow this. For example,
|
||||
/// text selection works in iTerm if you hold down the `Option` key while
|
||||
/// clicking and dragging.)
|
||||
/// Path to the `codex-linux-sandbox` executable. This must be set if
|
||||
/// [`crate::exec::SandboxType::LinuxSeccomp`] is used. Note that this
|
||||
/// cannot be set in the config file: it must be set in code via
|
||||
/// [`ConfigOverrides`].
|
||||
///
|
||||
/// Setting this option to `true` disables mouse capture, so scrolling with
|
||||
/// the mouse is not possible, though the keyboard shortcuts e.g. `b` and
|
||||
/// `space` still work. This allows the user to select text in the TUI
|
||||
/// using the mouse without needing to hold down a modifier key.
|
||||
pub disable_mouse_capture: bool,
|
||||
/// When this program is invoked, arg0 will be set to `codex-linux-sandbox`.
|
||||
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
|
||||
/// If not "none", the value to use for `reasoning.effort` when making a
|
||||
/// request using the Responses API.
|
||||
pub model_reasoning_effort: ReasoningEffort,
|
||||
|
||||
/// If not "none", the value to use for `reasoning.summary` when making a
|
||||
/// request using the Responses API.
|
||||
pub model_reasoning_summary: ReasoningSummary,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
|
||||
pub enum UriBasedFileOpener {
|
||||
#[serde(rename = "vscode")]
|
||||
VsCode,
|
||||
impl Config {
|
||||
/// Load configuration with *generic* CLI overrides (`-c key=value`) applied
|
||||
/// **in between** the values parsed from `config.toml` and the
|
||||
/// strongly-typed overrides specified via [`ConfigOverrides`].
|
||||
///
|
||||
/// The precedence order is therefore: `config.toml` < `-c` overrides <
|
||||
/// `ConfigOverrides`.
|
||||
pub fn load_with_cli_overrides(
|
||||
cli_overrides: Vec<(String, TomlValue)>,
|
||||
overrides: ConfigOverrides,
|
||||
) -> std::io::Result<Self> {
|
||||
// Resolve the directory that stores Codex state (e.g. ~/.codex or the
|
||||
// value of $CODEX_HOME) so we can embed it into the resulting
|
||||
// `Config` instance.
|
||||
let codex_home = find_codex_home()?;
|
||||
|
||||
#[serde(rename = "vscode-insiders")]
|
||||
VsCodeInsiders,
|
||||
// Step 1: parse `config.toml` into a generic JSON value.
|
||||
let mut root_value = load_config_as_toml(&codex_home)?;
|
||||
|
||||
#[serde(rename = "windsurf")]
|
||||
Windsurf,
|
||||
// Step 2: apply the `-c` overrides.
|
||||
for (path, value) in cli_overrides.into_iter() {
|
||||
apply_toml_override(&mut root_value, &path, value);
|
||||
}
|
||||
|
||||
#[serde(rename = "cursor")]
|
||||
Cursor,
|
||||
// Step 3: deserialize into `ConfigToml` so that Serde can enforce the
|
||||
// correct types.
|
||||
let cfg: ConfigToml = root_value.try_into().map_err(|e| {
|
||||
tracing::error!("Failed to deserialize overridden config: {e}");
|
||||
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
||||
})?;
|
||||
|
||||
/// Option to disable the URI-based file opener.
|
||||
#[serde(rename = "none")]
|
||||
None,
|
||||
// Step 4: merge with the strongly-typed overrides.
|
||||
Self::load_from_base_config_with_overrides(cfg, overrides, codex_home)
|
||||
}
|
||||
}
|
||||
|
||||
impl UriBasedFileOpener {
|
||||
pub fn get_scheme(&self) -> Option<&str> {
|
||||
match self {
|
||||
UriBasedFileOpener::VsCode => Some("vscode"),
|
||||
UriBasedFileOpener::VsCodeInsiders => Some("vscode-insiders"),
|
||||
UriBasedFileOpener::Windsurf => Some("windsurf"),
|
||||
UriBasedFileOpener::Cursor => Some("cursor"),
|
||||
UriBasedFileOpener::None => None,
|
||||
/// Read `CODEX_HOME/config.toml` and return it as a generic TOML value. Returns
|
||||
/// an empty TOML table when the file does not exist.
|
||||
fn load_config_as_toml(codex_home: &Path) -> std::io::Result<TomlValue> {
|
||||
let config_path = codex_home.join("config.toml");
|
||||
match std::fs::read_to_string(&config_path) {
|
||||
Ok(contents) => match toml::from_str::<TomlValue>(&contents) {
|
||||
Ok(val) => Ok(val),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to parse config.toml: {e}");
|
||||
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
||||
}
|
||||
},
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
tracing::info!("config.toml not found, writing template");
|
||||
write_default_config_template(&config_path);
|
||||
Ok(TomlValue::Table(Default::default()))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to read config.toml: {e}");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_default_config_template(config_path: &Path) {
|
||||
if let Some(parent) = config_path.parent() {
|
||||
if let Err(e) = std::fs::create_dir_all(parent) {
|
||||
tracing::error!("Failed to create config dir: {e}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
match std::fs::write(config_path, DEFAULT_CONFIG_TEMPLATE) {
|
||||
Ok(_) => tracing::info!("wrote default config template at {}", config_path.display()),
|
||||
Err(e) => tracing::error!("Failed to write default config template: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply a single dotted-path override onto a TOML value.
|
||||
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
||||
use toml::value::Table;
|
||||
|
||||
let segments: Vec<&str> = path.split('.').collect();
|
||||
let mut current = root;
|
||||
|
||||
for (idx, segment) in segments.iter().enumerate() {
|
||||
let is_last = idx == segments.len() - 1;
|
||||
|
||||
if is_last {
|
||||
match current {
|
||||
TomlValue::Table(table) => {
|
||||
table.insert(segment.to_string(), value);
|
||||
}
|
||||
_ => {
|
||||
let mut table = Table::new();
|
||||
table.insert(segment.to_string(), value);
|
||||
*current = TomlValue::Table(table);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Traverse or create intermediate object.
|
||||
match current {
|
||||
TomlValue::Table(table) => {
|
||||
current = table
|
||||
.entry(segment.to_string())
|
||||
.or_insert_with(|| TomlValue::Table(Table::new()));
|
||||
}
|
||||
_ => {
|
||||
*current = TomlValue::Table(Table::new());
|
||||
if let TomlValue::Table(tbl) = current {
|
||||
current = tbl
|
||||
.entry(segment.to_string())
|
||||
.or_insert_with(|| TomlValue::Table(Table::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -171,14 +257,20 @@ pub struct ConfigToml {
|
||||
/// Provider to use from the model_providers map.
|
||||
pub model_provider: Option<String>,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Default approval policy for executing commands.
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
|
||||
// The `default` attribute ensures that the field is treated as `None` when
|
||||
// the key is omitted from the TOML. Without it, Serde treats the field as
|
||||
// required because we supply a custom deserializer.
|
||||
#[serde(default, deserialize_with = "deserialize_sandbox_permissions")]
|
||||
pub sandbox_permissions: Option<Vec<SandboxPermission>>,
|
||||
#[serde(default)]
|
||||
pub shell_environment_policy: ShellEnvironmentPolicyToml,
|
||||
|
||||
/// If omitted, Codex defaults to the restrictive `read-only` policy.
|
||||
pub sandbox: Option<SandboxPolicy>,
|
||||
|
||||
/// Disable server-side response storage (sends the full conversation
|
||||
/// context with every request). Currently necessary for OpenAI customers
|
||||
@@ -220,55 +312,13 @@ pub struct ConfigToml {
|
||||
|
||||
/// Collection of settings that are specific to the TUI.
|
||||
pub tui: Option<Tui>,
|
||||
}
|
||||
|
||||
impl ConfigToml {
|
||||
/// Attempt to parse the file at `~/.codex/config.toml`. If it does not
|
||||
/// exist, return a default config. Though if it exists and cannot be
|
||||
/// parsed, report that to the user and force them to fix it.
|
||||
fn load_from_toml(codex_home: &Path) -> std::io::Result<Self> {
|
||||
let config_toml_path = codex_home.join("config.toml");
|
||||
match std::fs::read_to_string(&config_toml_path) {
|
||||
Ok(contents) => toml::from_str::<Self>(&contents).map_err(|e| {
|
||||
tracing::error!("Failed to parse config.toml: {e}");
|
||||
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
|
||||
}),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
tracing::info!("config.toml not found, using defaults");
|
||||
Ok(Self::default())
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to read config.toml: {e}");
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/// When set to `true`, `AgentReasoning` events will be hidden from the
|
||||
/// UI/output. Defaults to `false`.
|
||||
pub hide_agent_reasoning: Option<bool>,
|
||||
|
||||
fn deserialize_sandbox_permissions<'de, D>(
|
||||
deserializer: D,
|
||||
) -> Result<Option<Vec<SandboxPermission>>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let permissions: Option<Vec<String>> = Option::deserialize(deserializer)?;
|
||||
|
||||
match permissions {
|
||||
Some(raw_permissions) => {
|
||||
let base_path = find_codex_home().map_err(serde::de::Error::custom)?;
|
||||
|
||||
let converted = raw_permissions
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
parse_sandbox_permission_with_base_path(&raw, base_path.clone())
|
||||
.map_err(serde::de::Error::custom)
|
||||
})
|
||||
.collect::<Result<Vec<_>, D::Error>>()?;
|
||||
|
||||
Ok(Some(converted))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
pub model_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub model_reasoning_summary: Option<ReasoningSummary>,
|
||||
}
|
||||
|
||||
/// Optional overrides for user configuration (e.g., from CLI flags).
|
||||
@@ -278,27 +328,12 @@ pub struct ConfigOverrides {
|
||||
pub cwd: Option<PathBuf>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox_policy: Option<SandboxPolicy>,
|
||||
pub disable_response_storage: Option<bool>,
|
||||
pub model_provider: Option<String>,
|
||||
pub config_profile: Option<String>,
|
||||
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Load configuration, optionally applying overrides (CLI flags). Merges
|
||||
/// ~/.codex/config.toml, ~/.codex/instructions.md, embedded defaults, and
|
||||
/// any values provided in `overrides` (highest precedence).
|
||||
pub fn load_with_overrides(overrides: ConfigOverrides) -> std::io::Result<Self> {
|
||||
// Resolve the directory that stores Codex state (e.g. ~/.codex or the
|
||||
// value of $CODEX_HOME) so we can embed it into the resulting
|
||||
// `Config` instance.
|
||||
let codex_home = find_codex_home()?;
|
||||
|
||||
let cfg: ConfigToml = ConfigToml::load_from_toml(&codex_home)?;
|
||||
tracing::warn!("Config parsed from config.toml: {cfg:?}");
|
||||
|
||||
Self::load_from_base_config_with_overrides(cfg, overrides, codex_home)
|
||||
}
|
||||
|
||||
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
||||
/// be used in all other cases.
|
||||
pub fn load_from_base_config_with_overrides(
|
||||
@@ -314,9 +349,9 @@ impl Config {
|
||||
cwd,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
disable_response_storage,
|
||||
model_provider,
|
||||
config_profile: config_profile_key,
|
||||
codex_linux_sandbox_exe,
|
||||
} = overrides;
|
||||
|
||||
let config_profile = match config_profile_key.or(cfg.profile) {
|
||||
@@ -333,20 +368,10 @@ impl Config {
|
||||
None => ConfigProfile::default(),
|
||||
};
|
||||
|
||||
let sandbox_policy = match sandbox_policy {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => {
|
||||
// Derive a SandboxPolicy from the permissions in the config.
|
||||
match cfg.sandbox_permissions {
|
||||
// Note this means the user can explicitly set permissions
|
||||
// to the empty list in the config file, granting it no
|
||||
// permissions whatsoever.
|
||||
Some(permissions) => SandboxPolicy::from(permissions),
|
||||
// Default to read only rather than completely locked down.
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let sandbox_policy = sandbox_policy.unwrap_or_else(|| {
|
||||
cfg.sandbox
|
||||
.unwrap_or_else(SandboxPolicy::new_read_only_policy)
|
||||
});
|
||||
|
||||
let mut model_providers = built_in_model_providers();
|
||||
// Merge user-defined providers into the built-in list.
|
||||
@@ -368,6 +393,8 @@ impl Config {
|
||||
})?
|
||||
.clone();
|
||||
|
||||
let shell_environment_policy = cfg.shell_environment_policy.into();
|
||||
|
||||
let resolved_cwd = {
|
||||
use std::env;
|
||||
|
||||
@@ -389,11 +416,23 @@ impl Config {
|
||||
|
||||
let history = cfg.history.unwrap_or_default();
|
||||
|
||||
let model = model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model);
|
||||
let openai_model_info = get_model_info(&model);
|
||||
let model_context_window = cfg
|
||||
.model_context_window
|
||||
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
||||
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
.map(|info| info.max_output_tokens)
|
||||
});
|
||||
let config = Self {
|
||||
model: model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model),
|
||||
model,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
model_provider_id,
|
||||
model_provider,
|
||||
cwd: resolved_cwd,
|
||||
@@ -402,8 +441,9 @@ impl Config {
|
||||
.or(cfg.approval_policy)
|
||||
.unwrap_or_else(AskForApproval::default),
|
||||
sandbox_policy,
|
||||
disable_response_storage: disable_response_storage
|
||||
.or(config_profile.disable_response_storage)
|
||||
shell_environment_policy,
|
||||
disable_response_storage: config_profile
|
||||
.disable_response_storage
|
||||
.or(cfg.disable_response_storage)
|
||||
.unwrap_or(false),
|
||||
notify: cfg.notify,
|
||||
@@ -415,6 +455,11 @@ impl Config {
|
||||
history,
|
||||
file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode),
|
||||
tui: cfg.tui.unwrap_or_default(),
|
||||
codex_linux_sandbox_exe,
|
||||
|
||||
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
|
||||
model_reasoning_effort: cfg.model_reasoning_effort.unwrap_or_default(),
|
||||
model_reasoning_summary: cfg.model_reasoning_summary.unwrap_or_default(),
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -476,102 +521,23 @@ pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
pub fn parse_sandbox_permission_with_base_path(
|
||||
raw: &str,
|
||||
base_path: PathBuf,
|
||||
) -> std::io::Result<SandboxPermission> {
|
||||
use SandboxPermission::*;
|
||||
|
||||
if let Some(path) = raw.strip_prefix("disk-write-folder=") {
|
||||
return if path.is_empty() {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"--sandbox-permission disk-write-folder=<PATH> requires a non-empty PATH",
|
||||
))
|
||||
} else {
|
||||
use path_absolutize::*;
|
||||
|
||||
let file = PathBuf::from(path);
|
||||
let absolute_path = if file.is_relative() {
|
||||
file.absolutize_from(base_path)
|
||||
} else {
|
||||
file.absolutize()
|
||||
}
|
||||
.map(|path| path.into_owned())?;
|
||||
Ok(DiskWriteFolder {
|
||||
folder: absolute_path,
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
match raw {
|
||||
"disk-full-read-access" => Ok(DiskFullReadAccess),
|
||||
"disk-write-platform-user-temp-folder" => Ok(DiskWritePlatformUserTempFolder),
|
||||
"disk-write-platform-global-temp-folder" => Ok(DiskWritePlatformGlobalTempFolder),
|
||||
"disk-write-cwd" => Ok(DiskWriteCwd),
|
||||
"disk-full-write-access" => Ok(DiskFullWriteAccess),
|
||||
"network-full-access" => Ok(NetworkFullAccess),
|
||||
_ => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"`{raw}` is not a recognised permission.\nRun with `--help` to see the accepted values."
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
use crate::config_types::HistoryPersistence;
|
||||
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Verify that the `sandbox_permissions` field on `ConfigToml` correctly
|
||||
/// differentiates between a value that is completely absent in the
|
||||
/// provided TOML (i.e. `None`) and one that is explicitly specified as an
|
||||
/// empty array (i.e. `Some(vec![])`). This ensures that downstream logic
|
||||
/// that treats these two cases differently (default read-only policy vs a
|
||||
/// fully locked-down sandbox) continues to function.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_none_vs_empty_vec() {
|
||||
// Case 1: `sandbox_permissions` key is *absent* from the TOML source.
|
||||
let toml_source_without_key = "";
|
||||
let cfg_without_key: ConfigToml = toml::from_str(toml_source_without_key)
|
||||
.expect("TOML deserialization without key should succeed");
|
||||
assert!(cfg_without_key.sandbox_permissions.is_none());
|
||||
|
||||
// Case 2: `sandbox_permissions` is present but set to an *empty array*.
|
||||
let toml_source_with_empty = "sandbox_permissions = []";
|
||||
let cfg_with_empty: ConfigToml = toml::from_str(toml_source_with_empty)
|
||||
.expect("TOML deserialization with empty array should succeed");
|
||||
assert_eq!(Some(vec![]), cfg_with_empty.sandbox_permissions);
|
||||
|
||||
// Case 3: `sandbox_permissions` contains a non-empty list of valid values.
|
||||
let toml_source_with_values = r#"
|
||||
sandbox_permissions = ["disk-full-read-access", "network-full-access"]
|
||||
"#;
|
||||
let cfg_with_values: ConfigToml = toml::from_str(toml_source_with_values)
|
||||
.expect("TOML deserialization with valid permissions should succeed");
|
||||
|
||||
assert_eq!(
|
||||
Some(vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::NetworkFullAccess
|
||||
]),
|
||||
cfg_with_values.sandbox_permissions
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_parsing() {
|
||||
let history_with_persistence = r#"
|
||||
[history]
|
||||
persistence = "save-all"
|
||||
"#;
|
||||
let history_with_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_with_persistence_cfg = toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::SaveAll,
|
||||
@@ -585,9 +551,8 @@ persistence = "save-all"
|
||||
persistence = "none"
|
||||
"#;
|
||||
|
||||
let history_no_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_no_persistence_cfg = toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::None,
|
||||
@@ -597,20 +562,47 @@ persistence = "none"
|
||||
);
|
||||
}
|
||||
|
||||
/// Deserializing a TOML string containing an *invalid* permission should
|
||||
/// fail with a helpful error rather than silently defaulting or
|
||||
/// succeeding.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_illegal_value() {
|
||||
let toml_bad = r#"sandbox_permissions = ["not-a-real-permission"]"#;
|
||||
fn test_sandbox_config_parsing() {
|
||||
let sandbox_full_access = r#"
|
||||
[sandbox]
|
||||
mode = "danger-full-access"
|
||||
network_access = false # This should be ignored.
|
||||
"#;
|
||||
let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(SandboxPolicy::DangerFullAccess),
|
||||
sandbox_full_access_cfg.sandbox
|
||||
);
|
||||
|
||||
let err = toml::from_str::<ConfigToml>(toml_bad)
|
||||
.expect_err("Deserialization should fail for invalid permission");
|
||||
let sandbox_read_only = r#"
|
||||
[sandbox]
|
||||
mode = "read-only"
|
||||
network_access = true # This should be ignored.
|
||||
"#;
|
||||
|
||||
// Make sure the error message contains the invalid value so users have
|
||||
// useful feedback.
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("not-a-real-permission"));
|
||||
let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(Some(SandboxPolicy::ReadOnly), sandbox_read_only_cfg.sandbox);
|
||||
|
||||
let sandbox_workspace_write = r#"
|
||||
[sandbox]
|
||||
mode = "workspace-write"
|
||||
writable_roots = [
|
||||
"/tmp",
|
||||
]
|
||||
"#;
|
||||
|
||||
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![PathBuf::from("/tmp")],
|
||||
network_access: false
|
||||
}),
|
||||
sandbox_workspace_write_cfg.sandbox
|
||||
);
|
||||
}
|
||||
|
||||
struct PrecedenceTestFixture {
|
||||
@@ -635,8 +627,7 @@ persistence = "none"
|
||||
fn create_test_fixture() -> std::io::Result<PrecedenceTestFixture> {
|
||||
let toml = r#"
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
approval_policy = "untrusted"
|
||||
disable_response_storage = false
|
||||
|
||||
# Can be used to determine which profile to use if not specified by
|
||||
@@ -683,6 +674,7 @@ disable_response_storage = true
|
||||
env_key: Some("OPENAI_API_KEY".to_string()),
|
||||
wire_api: crate::WireApi::Chat,
|
||||
env_key_instructions: None,
|
||||
query_params: None,
|
||||
};
|
||||
let model_provider_map = {
|
||||
let mut model_provider_map = built_in_model_providers();
|
||||
@@ -737,10 +729,13 @@ disable_response_storage = true
|
||||
assert_eq!(
|
||||
Config {
|
||||
model: "o3".to_string(),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
notify: None,
|
||||
@@ -752,6 +747,10 @@ disable_response_storage = true
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -774,10 +773,13 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_gpt3_profile_config = Config {
|
||||
model: "gpt-3.5-turbo".to_string(),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
model_provider_id: "openai-chat-completions".to_string(),
|
||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||
approval_policy: AskForApproval::UnlessAllowListed,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
notify: None,
|
||||
@@ -789,6 +791,10 @@ disable_response_storage = true
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -826,10 +832,13 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_zdr_profile_config = Config {
|
||||
model: "o3".to_string(),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::OnFailure,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: true,
|
||||
instructions: None,
|
||||
notify: None,
|
||||
@@ -841,6 +850,10 @@ disable_response_storage = true
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
207
codex-rs/core/src/config_types.rs
Normal file
207
codex-rs/core/src/config_types.rs
Normal file
@@ -0,0 +1,207 @@
|
||||
//! Types used to define the fields of [`crate::config::Config`].
|
||||
|
||||
// Note this file should generally be restricted to simple struct/enum
|
||||
// definitions that do not contain business logic.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use strum_macros::Display;
|
||||
use wildmatch::WildMatchPattern;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq)]
|
||||
pub struct McpServerConfig {
|
||||
pub command: String,
|
||||
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
|
||||
#[serde(default)]
|
||||
pub env: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
|
||||
pub enum UriBasedFileOpener {
|
||||
#[serde(rename = "vscode")]
|
||||
VsCode,
|
||||
|
||||
#[serde(rename = "vscode-insiders")]
|
||||
VsCodeInsiders,
|
||||
|
||||
#[serde(rename = "windsurf")]
|
||||
Windsurf,
|
||||
|
||||
#[serde(rename = "cursor")]
|
||||
Cursor,
|
||||
|
||||
/// Option to disable the URI-based file opener.
|
||||
#[serde(rename = "none")]
|
||||
None,
|
||||
}
|
||||
|
||||
impl UriBasedFileOpener {
|
||||
pub fn get_scheme(&self) -> Option<&str> {
|
||||
match self {
|
||||
UriBasedFileOpener::VsCode => Some("vscode"),
|
||||
UriBasedFileOpener::VsCodeInsiders => Some("vscode-insiders"),
|
||||
UriBasedFileOpener::Windsurf => Some("windsurf"),
|
||||
UriBasedFileOpener::Cursor => Some("cursor"),
|
||||
UriBasedFileOpener::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Settings that govern if and what will be written to `~/.codex/history.jsonl`.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct History {
|
||||
/// If true, history entries will not be written to disk.
|
||||
pub persistence: HistoryPersistence,
|
||||
|
||||
/// If set, the maximum size of the history file in bytes.
|
||||
/// TODO(mbolin): Not currently honored.
|
||||
pub max_bytes: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum HistoryPersistence {
|
||||
/// Save all history entries to disk.
|
||||
#[default]
|
||||
SaveAll,
|
||||
/// Do not write history to disk.
|
||||
None,
|
||||
}
|
||||
|
||||
/// Collection of settings that are specific to the TUI.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct Tui {
|
||||
/// By default, mouse capture is enabled in the TUI so that it is possible
|
||||
/// to scroll the conversation history with a mouse. This comes at the cost
|
||||
/// of not being able to use the mouse to select text in the TUI.
|
||||
/// (Most terminals support a modifier key to allow this. For example,
|
||||
/// text selection works in iTerm if you hold down the `Option` key while
|
||||
/// clicking and dragging.)
|
||||
///
|
||||
/// Setting this option to `true` disables mouse capture, so scrolling with
|
||||
/// the mouse is not possible, though the keyboard shortcuts e.g. `b` and
|
||||
/// `space` still work. This allows the user to select text in the TUI
|
||||
/// using the mouse without needing to hold down a modifier key.
|
||||
pub disable_mouse_capture: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum ShellEnvironmentPolicyInherit {
|
||||
/// "Core" environment variables for the platform. On UNIX, this would
|
||||
/// include HOME, LOGNAME, PATH, SHELL, and USER, among others.
|
||||
#[default]
|
||||
Core,
|
||||
|
||||
/// Inherits the full environment from the parent process.
|
||||
All,
|
||||
|
||||
/// Do not inherit any environment variables from the parent process.
|
||||
None,
|
||||
}
|
||||
|
||||
/// Policy for building the `env` when spawning a process via either the
|
||||
/// `shell` or `local_shell` tool.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct ShellEnvironmentPolicyToml {
|
||||
pub inherit: Option<ShellEnvironmentPolicyInherit>,
|
||||
|
||||
pub ignore_default_excludes: Option<bool>,
|
||||
|
||||
/// List of regular expressions.
|
||||
pub exclude: Option<Vec<String>>,
|
||||
|
||||
pub r#set: Option<HashMap<String, String>>,
|
||||
|
||||
/// List of regular expressions.
|
||||
pub include_only: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
pub type EnvironmentVariablePattern = WildMatchPattern<'*', '?'>;
|
||||
|
||||
/// Deriving the `env` based on this policy works as follows:
|
||||
/// 1. Create an initial map based on the `inherit` policy.
|
||||
/// 2. If `ignore_default_excludes` is false, filter the map using the default
|
||||
/// exclude pattern(s), which are: `"*KEY*"` and `"*TOKEN*"`.
|
||||
/// 3. If `exclude` is not empty, filter the map using the provided patterns.
|
||||
/// 4. Insert any entries from `r#set` into the map.
|
||||
/// 5. If non-empty, filter the map using the `include_only` patterns.
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct ShellEnvironmentPolicy {
|
||||
/// Starting point when building the environment.
|
||||
pub inherit: ShellEnvironmentPolicyInherit,
|
||||
|
||||
/// True to skip the check to exclude default environment variables that
|
||||
/// contain "KEY" or "TOKEN" in their name.
|
||||
pub ignore_default_excludes: bool,
|
||||
|
||||
/// Environment variable names to exclude from the environment.
|
||||
pub exclude: Vec<EnvironmentVariablePattern>,
|
||||
|
||||
/// (key, value) pairs to insert in the environment.
|
||||
pub r#set: HashMap<String, String>,
|
||||
|
||||
/// Environment variable names to retain in the environment.
|
||||
pub include_only: Vec<EnvironmentVariablePattern>,
|
||||
}
|
||||
|
||||
impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
|
||||
fn from(toml: ShellEnvironmentPolicyToml) -> Self {
|
||||
let inherit = toml.inherit.unwrap_or(ShellEnvironmentPolicyInherit::Core);
|
||||
let ignore_default_excludes = toml.ignore_default_excludes.unwrap_or(false);
|
||||
let exclude = toml
|
||||
.exclude
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|s| EnvironmentVariablePattern::new_case_insensitive(&s))
|
||||
.collect();
|
||||
let r#set = toml.r#set.unwrap_or_default();
|
||||
let include_only = toml
|
||||
.include_only
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|s| EnvironmentVariablePattern::new_case_insensitive(&s))
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
inherit,
|
||||
ignore_default_excludes,
|
||||
exclude,
|
||||
r#set,
|
||||
include_only,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning
|
||||
#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[strum(serialize_all = "lowercase")]
|
||||
pub enum ReasoningEffort {
|
||||
Low,
|
||||
#[default]
|
||||
Medium,
|
||||
High,
|
||||
/// Option to disable reasoning.
|
||||
None,
|
||||
}
|
||||
|
||||
/// A summary of the reasoning performed by the model. This can be useful for
|
||||
/// debugging and understanding the model's reasoning process.
|
||||
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
|
||||
#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[strum(serialize_all = "lowercase")]
|
||||
pub enum ReasoningSummary {
|
||||
#[default]
|
||||
Auto,
|
||||
Concise,
|
||||
Detailed,
|
||||
/// Option to disable reasoning summaries.
|
||||
None,
|
||||
}
|
||||
@@ -25,7 +25,8 @@ impl ConversationHistory {
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
I: IntoIterator<Item = ResponseItem>,
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
for item in items {
|
||||
if is_api_message(&item) {
|
||||
|
||||
@@ -74,6 +74,9 @@ pub enum CodexErr {
|
||||
#[error("sandbox error: {0}")]
|
||||
Sandbox(#[from] SandboxErr),
|
||||
|
||||
#[error("codex-linux-sandbox was required but not provided")]
|
||||
LandlockSandboxExecutableNotProvided,
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Automatic conversions for common external error types
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::process::ExitStatusExt;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -20,7 +21,6 @@ use tokio::sync::Notify;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec_linux::exec_linux;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
// Maximum we send for each stream, which is either:
|
||||
@@ -59,6 +59,7 @@ pub struct ExecParams {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
@@ -77,6 +78,7 @@ pub async fn process_exec_tool_call(
|
||||
sandbox_type: SandboxType,
|
||||
ctrl_c: Arc<Notify>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
codex_linux_sandbox_exe: &Option<PathBuf>,
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
let start = Instant::now();
|
||||
|
||||
@@ -87,17 +89,41 @@ pub async fn process_exec_tool_call(
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
} = params;
|
||||
let child = spawn_command_under_seatbelt(
|
||||
command,
|
||||
sandbox_policy,
|
||||
cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
}
|
||||
SandboxType::LinuxSeccomp => exec_linux(params, ctrl_c, sandbox_policy),
|
||||
SandboxType::LinuxSeccomp => {
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
} = params;
|
||||
|
||||
let codex_linux_sandbox_exe = codex_linux_sandbox_exe
|
||||
.as_ref()
|
||||
.ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?;
|
||||
let child = spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
sandbox_policy,
|
||||
cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
}
|
||||
};
|
||||
let duration = start.elapsed();
|
||||
match raw_output_result {
|
||||
@@ -145,12 +171,82 @@ pub async fn spawn_command_under_seatbelt(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let seatbelt_command = create_seatbelt_command(command, sandbox_policy, &cwd);
|
||||
spawn_child_async(seatbelt_command, cwd, sandbox_policy, stdio_policy).await
|
||||
let args = create_seatbelt_command_args(command, sandbox_policy, &cwd);
|
||||
let arg0 = None;
|
||||
spawn_child_async(
|
||||
PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
|
||||
args,
|
||||
arg0,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn create_seatbelt_command(
|
||||
/// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper
|
||||
/// (codex-linux-sandbox).
|
||||
///
|
||||
/// Unlike macOS Seatbelt where we directly embed the policy text, the Linux
|
||||
/// helper accepts a list of `--sandbox-permission`/`-s` flags mirroring the
|
||||
/// public CLI. We convert the internal [`SandboxPolicy`] representation into
|
||||
/// the equivalent CLI options.
|
||||
pub async fn spawn_command_under_linux_sandbox<P>(
|
||||
codex_linux_sandbox_exe: P,
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
let args = create_linux_sandbox_command_args(command, sandbox_policy, &cwd);
|
||||
let arg0 = Some("codex-linux-sandbox");
|
||||
spawn_child_async(
|
||||
codex_linux_sandbox_exe.as_ref().to_path_buf(),
|
||||
args,
|
||||
arg0,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
|
||||
fn create_linux_sandbox_command_args(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Vec<String> {
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_json =
|
||||
serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
|
||||
|
||||
let mut linux_cmd: Vec<String> = vec![
|
||||
sandbox_policy_cwd,
|
||||
sandbox_policy_json,
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
"--".to_string(),
|
||||
];
|
||||
|
||||
// Append the original tool command.
|
||||
linux_cmd.extend(command);
|
||||
|
||||
linux_cmd
|
||||
}
|
||||
|
||||
fn create_seatbelt_command_args(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
@@ -202,15 +298,11 @@ fn create_seatbelt_command(
|
||||
let full_policy = format!(
|
||||
"{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}"
|
||||
);
|
||||
let mut seatbelt_command: Vec<String> = vec![
|
||||
MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string(),
|
||||
"-p".to_string(),
|
||||
full_policy,
|
||||
];
|
||||
seatbelt_command.extend(extra_cli_args);
|
||||
seatbelt_command.push("--".to_string());
|
||||
seatbelt_command.extend(command);
|
||||
seatbelt_command
|
||||
let mut seatbelt_args: Vec<String> = vec!["-p".to_string(), full_policy];
|
||||
seatbelt_args.extend(extra_cli_args);
|
||||
seatbelt_args.push("--".to_string());
|
||||
seatbelt_args.extend(command);
|
||||
seatbelt_args
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -233,15 +325,26 @@ async fn exec(
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
}: ExecParams,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
ctrl_c: Arc<Notify>,
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
CodexErr::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"command args are empty",
|
||||
))
|
||||
})?;
|
||||
let arg0 = None;
|
||||
let child = spawn_child_async(
|
||||
command,
|
||||
PathBuf::from(program),
|
||||
args.into(),
|
||||
arg0,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
@@ -253,88 +356,53 @@ pub enum StdioPolicy {
|
||||
Inherit,
|
||||
}
|
||||
|
||||
macro_rules! configure_command {
|
||||
(
|
||||
$cmd_type: path,
|
||||
$command: expr,
|
||||
$cwd: expr,
|
||||
$sandbox_policy: expr,
|
||||
$stdio_policy: expr
|
||||
) => {{
|
||||
// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
|
||||
// we need to determine whether to set the
|
||||
// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable.
|
||||
// Ultimately, we should be stricter about the environment variables that
|
||||
// are set for the command (as we are when spawning an MCP server), so
|
||||
// instead of SandboxPolicy, we should take the exact env to use for the
|
||||
// Command (i.e., `env_clear().envs(env)`).
|
||||
if $command.is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"command args are empty",
|
||||
));
|
||||
}
|
||||
|
||||
let mut cmd = <$cmd_type>::new(&$command[0]);
|
||||
cmd.args(&$command[1..]);
|
||||
cmd.current_dir($cwd);
|
||||
|
||||
if !$sandbox_policy.has_full_network_access() {
|
||||
cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
|
||||
}
|
||||
|
||||
match $stdio_policy {
|
||||
StdioPolicy::RedirectForShellTool => {
|
||||
// Do not create a file descriptor for stdin because otherwise some
|
||||
// commands may hang forever waiting for input. For example, ripgrep has
|
||||
// a heuristic where it may try to read from stdin as explained here:
|
||||
// https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103
|
||||
cmd.stdin(Stdio::null());
|
||||
|
||||
cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
}
|
||||
StdioPolicy::Inherit => {
|
||||
// Inherit stdin, stdout, and stderr from the parent process.
|
||||
cmd.stdin(Stdio::inherit())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit());
|
||||
}
|
||||
}
|
||||
|
||||
std::io::Result::<$cmd_type>::Ok(cmd)
|
||||
}};
|
||||
}
|
||||
|
||||
/// Spawns the appropriate child process for the ExecParams and SandboxPolicy,
|
||||
/// ensuring the args and environment variables used to create the `Command`
|
||||
/// (and `Child`) honor the configuration.
|
||||
pub(crate) async fn spawn_child_async(
|
||||
command: Vec<String>,
|
||||
///
|
||||
/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
|
||||
/// we need to determine whether to set the
|
||||
/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable.
|
||||
async fn spawn_child_async(
|
||||
program: PathBuf,
|
||||
args: Vec<String>,
|
||||
#[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>,
|
||||
cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let mut cmd = configure_command!(Command, command, cwd, sandbox_policy, stdio_policy)?;
|
||||
cmd.kill_on_drop(true).spawn()
|
||||
}
|
||||
let mut cmd = Command::new(&program);
|
||||
#[cfg(unix)]
|
||||
cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from));
|
||||
cmd.args(args);
|
||||
cmd.current_dir(cwd);
|
||||
cmd.env_clear();
|
||||
cmd.envs(env);
|
||||
|
||||
/// Alternative version of `spawn_child_async()` that returns
|
||||
/// `std::process::Child` instead of `tokio::process::Child`. This is useful for
|
||||
/// spawning a child process in a thread that is not running a Tokio runtime.
|
||||
pub fn spawn_child_sync(
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdio_policy: StdioPolicy,
|
||||
) -> std::io::Result<std::process::Child> {
|
||||
let mut cmd = configure_command!(
|
||||
std::process::Command,
|
||||
command,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
stdio_policy
|
||||
)?;
|
||||
cmd.spawn()
|
||||
if !sandbox_policy.has_full_network_access() {
|
||||
cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
|
||||
}
|
||||
|
||||
match stdio_policy {
|
||||
StdioPolicy::RedirectForShellTool => {
|
||||
// Do not create a file descriptor for stdin because otherwise some
|
||||
// commands may hang forever waiting for input. For example, ripgrep has
|
||||
// a heuristic where it may try to read from stdin as explained here:
|
||||
// https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103
|
||||
cmd.stdin(Stdio::null());
|
||||
|
||||
cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
}
|
||||
StdioPolicy::Inherit => {
|
||||
// Inherit stdin, stdout, and stderr from the parent process.
|
||||
cmd.stdin(Stdio::inherit())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit());
|
||||
}
|
||||
}
|
||||
|
||||
cmd.kill_on_drop(true).spawn()
|
||||
}
|
||||
|
||||
/// Consumes the output of a child process, truncating it so it is suitable for
|
||||
|
||||
196
codex-rs/core/src/exec_env.rs
Normal file
196
codex-rs/core/src/exec_env.rs
Normal file
@@ -0,0 +1,196 @@
|
||||
use crate::config_types::EnvironmentVariablePattern;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::config_types::ShellEnvironmentPolicyInherit;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Construct an environment map based on the rules in the specified policy. The
|
||||
/// resulting map can be passed directly to `Command::envs()` after calling
|
||||
/// `env_clear()` to ensure no unintended variables are leaked to the spawned
|
||||
/// process.
|
||||
///
|
||||
/// The derivation follows the algorithm documented in the struct-level comment
|
||||
/// for [`ShellEnvironmentPolicy`].
|
||||
pub fn create_env(policy: &ShellEnvironmentPolicy) -> HashMap<String, String> {
|
||||
populate_env(std::env::vars(), policy)
|
||||
}
|
||||
|
||||
fn populate_env<I>(vars: I, policy: &ShellEnvironmentPolicy) -> HashMap<String, String>
|
||||
where
|
||||
I: IntoIterator<Item = (String, String)>,
|
||||
{
|
||||
// Step 1 – determine the starting set of variables based on the
|
||||
// `inherit` strategy.
|
||||
let mut env_map: HashMap<String, String> = match policy.inherit {
|
||||
ShellEnvironmentPolicyInherit::All => vars.into_iter().collect(),
|
||||
ShellEnvironmentPolicyInherit::None => HashMap::new(),
|
||||
ShellEnvironmentPolicyInherit::Core => {
|
||||
const CORE_VARS: &[&str] = &[
|
||||
"HOME", "LOGNAME", "PATH", "SHELL", "USER", "USERNAME", "TMPDIR", "TEMP", "TMP",
|
||||
];
|
||||
let allow: HashSet<&str> = CORE_VARS.iter().copied().collect();
|
||||
vars.into_iter()
|
||||
.filter(|(k, _)| allow.contains(k.as_str()))
|
||||
.collect()
|
||||
}
|
||||
};
|
||||
|
||||
// Internal helper – does `name` match **any** pattern in `patterns`?
|
||||
let matches_any = |name: &str, patterns: &[EnvironmentVariablePattern]| -> bool {
|
||||
patterns.iter().any(|pattern| pattern.matches(name))
|
||||
};
|
||||
|
||||
// Step 2 – Apply the default exclude if not disabled.
|
||||
if !policy.ignore_default_excludes {
|
||||
let default_excludes = vec![
|
||||
EnvironmentVariablePattern::new_case_insensitive("*KEY*"),
|
||||
EnvironmentVariablePattern::new_case_insensitive("*SECRET*"),
|
||||
EnvironmentVariablePattern::new_case_insensitive("*TOKEN*"),
|
||||
];
|
||||
env_map.retain(|k, _| !matches_any(k, &default_excludes));
|
||||
}
|
||||
|
||||
// Step 3 – Apply custom excludes.
|
||||
if !policy.exclude.is_empty() {
|
||||
env_map.retain(|k, _| !matches_any(k, &policy.exclude));
|
||||
}
|
||||
|
||||
// Step 4 – Apply user-provided overrides.
|
||||
for (key, val) in &policy.r#set {
|
||||
env_map.insert(key.clone(), val.clone());
|
||||
}
|
||||
|
||||
// Step 5 – If include_only is non-empty, keep *only* the matching vars.
|
||||
if !policy.include_only.is_empty() {
|
||||
env_map.retain(|k, _| matches_any(k, &policy.include_only));
|
||||
}
|
||||
|
||||
env_map
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use super::*;
|
||||
use crate::config_types::ShellEnvironmentPolicyInherit;
|
||||
use maplit::hashmap;
|
||||
|
||||
fn make_vars(pairs: &[(&str, &str)]) -> Vec<(String, String)> {
|
||||
pairs
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_string(), v.to_string()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_core_inherit_and_default_excludes() {
|
||||
let vars = make_vars(&[
|
||||
("PATH", "/usr/bin"),
|
||||
("HOME", "/home/user"),
|
||||
("API_KEY", "secret"),
|
||||
("SECRET_TOKEN", "t"),
|
||||
]);
|
||||
|
||||
let policy = ShellEnvironmentPolicy::default(); // inherit Core, default excludes on
|
||||
let result = populate_env(vars, &policy);
|
||||
|
||||
let expected: HashMap<String, String> = hashmap! {
|
||||
"PATH".to_string() => "/usr/bin".to_string(),
|
||||
"HOME".to_string() => "/home/user".to_string(),
|
||||
};
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_only() {
|
||||
let vars = make_vars(&[("PATH", "/usr/bin"), ("FOO", "bar")]);
|
||||
|
||||
let policy = ShellEnvironmentPolicy {
|
||||
// skip default excludes so nothing is removed prematurely
|
||||
ignore_default_excludes: true,
|
||||
include_only: vec![EnvironmentVariablePattern::new_case_insensitive("*PATH")],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = populate_env(vars, &policy);
|
||||
|
||||
let expected: HashMap<String, String> = hashmap! {
|
||||
"PATH".to_string() => "/usr/bin".to_string(),
|
||||
};
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_overrides() {
|
||||
let vars = make_vars(&[("PATH", "/usr/bin")]);
|
||||
|
||||
let mut policy = ShellEnvironmentPolicy {
|
||||
ignore_default_excludes: true,
|
||||
..Default::default()
|
||||
};
|
||||
policy.r#set.insert("NEW_VAR".to_string(), "42".to_string());
|
||||
|
||||
let result = populate_env(vars, &policy);
|
||||
|
||||
let expected: HashMap<String, String> = hashmap! {
|
||||
"PATH".to_string() => "/usr/bin".to_string(),
|
||||
"NEW_VAR".to_string() => "42".to_string(),
|
||||
};
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inherit_all() {
|
||||
let vars = make_vars(&[("PATH", "/usr/bin"), ("FOO", "bar")]);
|
||||
|
||||
let policy = ShellEnvironmentPolicy {
|
||||
inherit: ShellEnvironmentPolicyInherit::All,
|
||||
ignore_default_excludes: true, // keep everything
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = populate_env(vars.clone(), &policy);
|
||||
let expected: HashMap<String, String> = vars.into_iter().collect();
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inherit_all_with_default_excludes() {
|
||||
let vars = make_vars(&[("PATH", "/usr/bin"), ("API_KEY", "secret")]);
|
||||
|
||||
let policy = ShellEnvironmentPolicy {
|
||||
inherit: ShellEnvironmentPolicyInherit::All,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = populate_env(vars, &policy);
|
||||
let expected: HashMap<String, String> = hashmap! {
|
||||
"PATH".to_string() => "/usr/bin".to_string(),
|
||||
};
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inherit_none() {
|
||||
let vars = make_vars(&[("PATH", "/usr/bin"), ("HOME", "/home")]);
|
||||
|
||||
let mut policy = ShellEnvironmentPolicy {
|
||||
inherit: ShellEnvironmentPolicyInherit::None,
|
||||
ignore_default_excludes: true,
|
||||
..Default::default()
|
||||
};
|
||||
policy
|
||||
.r#set
|
||||
.insert("ONLY_VAR".to_string(), "yes".to_string());
|
||||
|
||||
let result = populate_env(vars, &policy);
|
||||
let expected: HashMap<String, String> = hashmap! {
|
||||
"ONLY_VAR".to_string() => "yes".to_string(),
|
||||
};
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::RawExecToolCallOutput;
|
||||
use crate::exec::StdioPolicy;
|
||||
use crate::exec::consume_truncated_output;
|
||||
use crate::exec::spawn_child_async;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
use tokio::sync::Notify;
|
||||
|
||||
pub fn exec_linux(
|
||||
params: ExecParams,
|
||||
ctrl_c: Arc<Notify>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
// Allow READ on /
|
||||
// Allow WRITE on /dev/null
|
||||
let ctrl_c_copy = ctrl_c.clone();
|
||||
let sandbox_policy = sandbox_policy.clone();
|
||||
|
||||
// Isolate thread to run the sandbox from
|
||||
let tool_call_output = std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
rt.block_on(async {
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
} = params;
|
||||
apply_sandbox_policy_to_current_thread(&sandbox_policy, &cwd)?;
|
||||
let child = spawn_child_async(
|
||||
command,
|
||||
cwd,
|
||||
&sandbox_policy,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c_copy, timeout_ms).await
|
||||
})
|
||||
})
|
||||
.join();
|
||||
|
||||
match tool_call_output {
|
||||
Ok(Ok(output)) => Ok(output),
|
||||
Ok(Err(e)) => Err(e),
|
||||
Err(e) => Err(CodexErr::Io(io::Error::other(format!(
|
||||
"thread join failed: {e:?}"
|
||||
)))),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn apply_sandbox_policy_to_current_thread(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Result<()> {
|
||||
crate::landlock::apply_sandbox_policy_to_current_thread(sandbox_policy, cwd)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn apply_sandbox_policy_to_current_thread(
|
||||
_sandbox_policy: &SandboxPolicy,
|
||||
_cwd: &Path,
|
||||
) -> Result<()> {
|
||||
Err(CodexErr::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"linux sandbox is not supported on this platform",
|
||||
)))
|
||||
}
|
||||
@@ -1,326 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
use landlock::ABI;
|
||||
use landlock::Access;
|
||||
use landlock::AccessFs;
|
||||
use landlock::CompatLevel;
|
||||
use landlock::Compatible;
|
||||
use landlock::Ruleset;
|
||||
use landlock::RulesetAttr;
|
||||
use landlock::RulesetCreatedAttr;
|
||||
use seccompiler::BpfProgram;
|
||||
use seccompiler::SeccompAction;
|
||||
use seccompiler::SeccompCmpArgLen;
|
||||
use seccompiler::SeccompCmpOp;
|
||||
use seccompiler::SeccompCondition;
|
||||
use seccompiler::SeccompFilter;
|
||||
use seccompiler::SeccompRule;
|
||||
use seccompiler::TargetArch;
|
||||
use seccompiler::apply_filter;
|
||||
|
||||
/// Apply sandbox policies inside this thread so only the child inherits
|
||||
/// them, not the entire CLI process.
|
||||
pub(crate) fn apply_sandbox_policy_to_current_thread(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Result<()> {
|
||||
if !sandbox_policy.has_full_network_access() {
|
||||
install_network_seccomp_filter_on_current_thread()?;
|
||||
}
|
||||
|
||||
if !sandbox_policy.has_full_disk_write_access() {
|
||||
let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
|
||||
install_filesystem_landlock_rules_on_current_thread(writable_roots)?;
|
||||
}
|
||||
|
||||
// TODO(ragona): Add appropriate restrictions if
|
||||
// `sandbox_policy.has_full_disk_read_access()` is `false`.
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Installs Landlock file-system rules on the current thread allowing read
|
||||
/// access to the entire file-system while restricting write access to
|
||||
/// `/dev/null` and the provided list of `writable_roots`.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns [`CodexErr::Sandbox`] variants when the ruleset fails to apply.
|
||||
fn install_filesystem_landlock_rules_on_current_thread(writable_roots: Vec<PathBuf>) -> Result<()> {
|
||||
let abi = ABI::V5;
|
||||
let access_rw = AccessFs::from_all(abi);
|
||||
let access_ro = AccessFs::from_read(abi);
|
||||
|
||||
let mut ruleset = Ruleset::default()
|
||||
.set_compatibility(CompatLevel::BestEffort)
|
||||
.handle_access(access_rw)?
|
||||
.create()?
|
||||
.add_rules(landlock::path_beneath_rules(&["/"], access_ro))?
|
||||
.add_rules(landlock::path_beneath_rules(&["/dev/null"], access_rw))?
|
||||
.set_no_new_privs(true);
|
||||
|
||||
if !writable_roots.is_empty() {
|
||||
ruleset = ruleset.add_rules(landlock::path_beneath_rules(&writable_roots, access_rw))?;
|
||||
}
|
||||
|
||||
let status = ruleset.restrict_self()?;
|
||||
|
||||
if status.ruleset == landlock::RulesetStatus::NotEnforced {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::LandlockRestrict));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Installs a seccomp filter that blocks outbound network access except for
|
||||
/// AF_UNIX domain sockets.
|
||||
fn install_network_seccomp_filter_on_current_thread() -> std::result::Result<(), SandboxErr> {
|
||||
// Build rule map.
|
||||
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();
|
||||
|
||||
// Helper – insert unconditional deny rule for syscall number.
|
||||
let mut deny_syscall = |nr: i64| {
|
||||
rules.insert(nr, vec![]); // empty rule vec = unconditional match
|
||||
};
|
||||
|
||||
deny_syscall(libc::SYS_connect);
|
||||
deny_syscall(libc::SYS_accept);
|
||||
deny_syscall(libc::SYS_accept4);
|
||||
deny_syscall(libc::SYS_bind);
|
||||
deny_syscall(libc::SYS_listen);
|
||||
deny_syscall(libc::SYS_getpeername);
|
||||
deny_syscall(libc::SYS_getsockname);
|
||||
deny_syscall(libc::SYS_shutdown);
|
||||
deny_syscall(libc::SYS_sendto);
|
||||
deny_syscall(libc::SYS_sendmsg);
|
||||
deny_syscall(libc::SYS_sendmmsg);
|
||||
deny_syscall(libc::SYS_recvfrom);
|
||||
deny_syscall(libc::SYS_recvmsg);
|
||||
deny_syscall(libc::SYS_recvmmsg);
|
||||
deny_syscall(libc::SYS_getsockopt);
|
||||
deny_syscall(libc::SYS_setsockopt);
|
||||
deny_syscall(libc::SYS_ptrace);
|
||||
|
||||
// For `socket` we allow AF_UNIX (arg0 == AF_UNIX) and deny everything else.
|
||||
let unix_only_rule = SeccompRule::new(vec![SeccompCondition::new(
|
||||
0, // first argument (domain)
|
||||
SeccompCmpArgLen::Dword,
|
||||
SeccompCmpOp::Eq,
|
||||
libc::AF_UNIX as u64,
|
||||
)?])?;
|
||||
|
||||
rules.insert(libc::SYS_socket, vec![unix_only_rule]);
|
||||
rules.insert(libc::SYS_socketpair, vec![]); // always deny (Unix can use socketpair but fine, keep open?)
|
||||
|
||||
let filter = SeccompFilter::new(
|
||||
rules,
|
||||
SeccompAction::Allow, // default – allow
|
||||
SeccompAction::Errno(libc::EPERM as u32), // when rule matches – return EPERM
|
||||
if cfg!(target_arch = "x86_64") {
|
||||
TargetArch::x86_64
|
||||
} else if cfg!(target_arch = "aarch64") {
|
||||
TargetArch::aarch64
|
||||
} else {
|
||||
unimplemented!("unsupported architecture for seccomp filter");
|
||||
},
|
||||
)?;
|
||||
|
||||
let prog: BpfProgram = filter.try_into()?;
|
||||
|
||||
apply_filter(&prog)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![expect(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use super::*;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::process_exec_tool_call;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use std::sync::Arc;
|
||||
use tempfile::NamedTempFile;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(|elm| elm.to_string()).collect(),
|
||||
cwd: std::env::current_dir().expect("cwd should exist"),
|
||||
timeout_ms: Some(timeout_ms),
|
||||
};
|
||||
|
||||
let sandbox_policy =
|
||||
SandboxPolicy::new_read_only_policy_with_writable_roots(writable_roots);
|
||||
let ctrl_c = Arc::new(Notify::new());
|
||||
let res =
|
||||
process_exec_tool_call(params, SandboxType::LinuxSeccomp, ctrl_c, &sandbox_policy)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if res.exit_code != 0 {
|
||||
println!("stdout:\n{}", res.stdout);
|
||||
println!("stderr:\n{}", res.stderr);
|
||||
panic!("exit code: {}", res.exit_code);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_root_read() {
|
||||
run_cmd(&["ls", "-l", "/bin"], &[], 200).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic]
|
||||
async fn test_root_write() {
|
||||
let tmpfile = NamedTempFile::new().unwrap();
|
||||
let tmpfile_path = tmpfile.path().to_string_lossy();
|
||||
run_cmd(
|
||||
&["bash", "-lc", &format!("echo blah > {}", tmpfile_path)],
|
||||
&[],
|
||||
200,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_dev_null_write() {
|
||||
run_cmd(
|
||||
&["bash", "-lc", "echo blah > /dev/null"],
|
||||
&[],
|
||||
// We have seen timeouts when running this test in CI on GitHub,
|
||||
// so we are using a generous timeout until we can diagnose further.
|
||||
1_000,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_writable_root() {
|
||||
let tmpdir = tempfile::tempdir().unwrap();
|
||||
let file_path = tmpdir.path().join("test");
|
||||
run_cmd(
|
||||
&[
|
||||
"bash",
|
||||
"-lc",
|
||||
&format!("echo blah > {}", file_path.to_string_lossy()),
|
||||
],
|
||||
&[tmpdir.path().to_path_buf()],
|
||||
// We have seen timeouts when running this test in CI on GitHub,
|
||||
// so we are using a generous timeout until we can diagnose further.
|
||||
1_000,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic(expected = "Sandbox(Timeout)")]
|
||||
async fn test_timeout() {
|
||||
run_cmd(&["sleep", "2"], &[], 50).await;
|
||||
}
|
||||
|
||||
/// Helper that runs `cmd` under the Linux sandbox and asserts that the command
|
||||
/// does NOT succeed (i.e. returns a non‑zero exit code) **unless** the binary
|
||||
/// is missing in which case we silently treat it as an accepted skip so the
|
||||
/// suite remains green on leaner CI images.
|
||||
async fn assert_network_blocked(cmd: &[&str]) {
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(|s| s.to_string()).collect(),
|
||||
cwd: std::env::current_dir().expect("cwd should exist"),
|
||||
// Give the tool a generous 2‑second timeout so even slow DNS timeouts
|
||||
// do not stall the suite.
|
||||
timeout_ms: Some(2_000),
|
||||
};
|
||||
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
let ctrl_c = Arc::new(Notify::new());
|
||||
let result =
|
||||
process_exec_tool_call(params, SandboxType::LinuxSeccomp, ctrl_c, &sandbox_policy)
|
||||
.await;
|
||||
|
||||
let (exit_code, stdout, stderr) = match result {
|
||||
Ok(output) => (output.exit_code, output.stdout, output.stderr),
|
||||
Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
|
||||
(exit_code, stdout, stderr)
|
||||
}
|
||||
_ => {
|
||||
panic!("expected sandbox denied error, got: {:?}", result);
|
||||
}
|
||||
};
|
||||
|
||||
dbg!(&stderr);
|
||||
dbg!(&stdout);
|
||||
dbg!(&exit_code);
|
||||
|
||||
// A completely missing binary exits with 127. Anything else should also
|
||||
// be non‑zero (EPERM from seccomp will usually bubble up as 1, 2, 13…)
|
||||
// If—*and only if*—the command exits 0 we consider the sandbox breached.
|
||||
|
||||
if exit_code == 0 {
|
||||
panic!(
|
||||
"Network sandbox FAILED - {:?} exited 0\nstdout:\n{}\nstderr:\n{}",
|
||||
cmd, stdout, stderr
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_curl() {
|
||||
assert_network_blocked(&["curl", "-I", "http://openai.com"]).await;
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_wget() {
|
||||
assert_network_blocked(&["wget", "-qO-", "http://openai.com"]).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_ping() {
|
||||
// ICMP requires raw socket – should be denied quickly with EPERM.
|
||||
assert_network_blocked(&["ping", "-c", "1", "8.8.8.8"]).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_nc() {
|
||||
// Zero‑length connection attempt to localhost.
|
||||
assert_network_blocked(&["nc", "-z", "127.0.0.1", "80"]).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_ssh() {
|
||||
// Force ssh to attempt a real TCP connection but fail quickly. `BatchMode`
|
||||
// avoids password prompts, and `ConnectTimeout` keeps the hang time low.
|
||||
assert_network_blocked(&[
|
||||
"ssh",
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
"ConnectTimeout=1",
|
||||
"github.com",
|
||||
])
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_getent() {
|
||||
assert_network_blocked(&["getent", "ahosts", "openai.com"]).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sandbox_blocks_dev_tcp_redirection() {
|
||||
// This syntax is only supported by bash and zsh. We try bash first.
|
||||
// Fallback generic socket attempt using /bin/sh with bash‑style /dev/tcp. Not
|
||||
// all images ship bash, so we guard against 127 as well.
|
||||
assert_network_blocked(&["bash", "-c", "echo hi > /dev/tcp/127.0.0.1/80"]).await;
|
||||
}
|
||||
}
|
||||
@@ -13,25 +13,28 @@ pub use codex::Codex;
|
||||
pub mod codex_wrapper;
|
||||
pub mod config;
|
||||
pub mod config_profile;
|
||||
pub mod config_types;
|
||||
mod conversation_history;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_linux;
|
||||
pub mod exec_env;
|
||||
mod flags;
|
||||
mod is_safe_command;
|
||||
#[cfg(target_os = "linux")]
|
||||
pub mod landlock;
|
||||
mod mcp_connection_manager;
|
||||
pub mod mcp_server_config;
|
||||
mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::WireApi;
|
||||
mod models;
|
||||
pub mod openai_api_key;
|
||||
mod openai_model_info;
|
||||
mod openai_tools;
|
||||
mod project_doc;
|
||||
pub mod protocol;
|
||||
mod rollout;
|
||||
mod safety;
|
||||
mod user_notification;
|
||||
pub mod util;
|
||||
|
||||
pub use client_common::model_supports_reasoning_summaries;
|
||||
|
||||
@@ -19,7 +19,7 @@ use mcp_types::Tool;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::info;
|
||||
|
||||
use crate::mcp_server_config::McpServerConfig;
|
||||
use crate::config_types::McpServerConfig;
|
||||
|
||||
/// Delimiter used to separate the server name from the tool name in a fully
|
||||
/// qualified tool name.
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq)]
|
||||
pub struct McpServerConfig {
|
||||
pub command: String,
|
||||
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
|
||||
#[serde(default)]
|
||||
pub env: Option<HashMap<String, String>>,
|
||||
}
|
||||
@@ -50,51 +50,18 @@ pub(crate) async fn handle_mcp_tool_call(
|
||||
notify_mcp_tool_call_event(sess, sub_id, tool_call_begin_event).await;
|
||||
|
||||
// Perform the tool call.
|
||||
let (tool_call_end_event, tool_call_err) = match sess
|
||||
let result = sess
|
||||
.call_tool(&server, &tool_name, arguments_value, timeout)
|
||||
.await
|
||||
{
|
||||
Ok(result) => (
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id,
|
||||
success: !result.is_error.unwrap_or(false),
|
||||
result: Some(result),
|
||||
}),
|
||||
None,
|
||||
),
|
||||
Err(e) => (
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id,
|
||||
success: false,
|
||||
result: None,
|
||||
}),
|
||||
Some(e),
|
||||
),
|
||||
};
|
||||
.map_err(|e| format!("tool call error: {e}"));
|
||||
let tool_call_end_event = EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: call_id.clone(),
|
||||
result: result.clone(),
|
||||
});
|
||||
|
||||
notify_mcp_tool_call_event(sess, sub_id, tool_call_end_event.clone()).await;
|
||||
let EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id,
|
||||
success,
|
||||
result,
|
||||
}) = tool_call_end_event
|
||||
else {
|
||||
unimplemented!("unexpected event type");
|
||||
};
|
||||
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: result.map_or_else(
|
||||
|| format!("err: {tool_call_err:?}"),
|
||||
|result| {
|
||||
serde_json::to_string(&result)
|
||||
.unwrap_or_else(|e| format!("JSON serialization error: {e}"))
|
||||
},
|
||||
),
|
||||
success: Some(success),
|
||||
},
|
||||
}
|
||||
ResponseInputItem::McpToolCallOutput { call_id, result }
|
||||
}
|
||||
|
||||
async fn notify_mcp_tool_call_event(sess: &Session, sub_id: &str, event: EventMsg) {
|
||||
|
||||
@@ -28,7 +28,7 @@ use tokio::io::AsyncReadExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::config::HistoryPersistence;
|
||||
use crate::config_types::HistoryPersistence;
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
|
||||
@@ -11,6 +11,7 @@ use std::collections::HashMap;
|
||||
use std::env::VarError;
|
||||
|
||||
use crate::error::EnvVarError;
|
||||
use crate::openai_api_key::get_openai_api_key;
|
||||
|
||||
/// Wire protocol that the provider speaks. Most third-party services only
|
||||
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
|
||||
@@ -22,9 +23,10 @@ use crate::error::EnvVarError;
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum WireApi {
|
||||
/// The experimental “Responses” API exposed by OpenAI at `/v1/responses`.
|
||||
#[default]
|
||||
Responses,
|
||||
|
||||
/// Regular Chat Completions compatible with `/v1/chat/completions`.
|
||||
#[default]
|
||||
Chat,
|
||||
}
|
||||
|
||||
@@ -43,7 +45,32 @@ pub struct ModelProviderInfo {
|
||||
pub env_key_instructions: Option<String>,
|
||||
|
||||
/// Which wire protocol this provider expects.
|
||||
#[serde(default)]
|
||||
pub wire_api: WireApi,
|
||||
|
||||
/// Optional query parameters to append to the base URL.
|
||||
pub query_params: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
impl ModelProviderInfo {
|
||||
pub(crate) fn get_full_url(&self) -> String {
|
||||
let query_string = self
|
||||
.query_params
|
||||
.as_ref()
|
||||
.map_or_else(String::new, |params| {
|
||||
let full_params = params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{k}={v}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
format!("?{full_params}")
|
||||
});
|
||||
let base_url = &self.base_url;
|
||||
match self.wire_api {
|
||||
WireApi::Responses => format!("{base_url}/responses{query_string}"),
|
||||
WireApi::Chat => format!("{base_url}/chat/completions{query_string}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ModelProviderInfo {
|
||||
@@ -52,20 +79,27 @@ impl ModelProviderInfo {
|
||||
/// cannot be found, returns an error.
|
||||
pub fn api_key(&self) -> crate::error::Result<Option<String>> {
|
||||
match &self.env_key {
|
||||
Some(env_key) => std::env::var(env_key)
|
||||
.and_then(|v| {
|
||||
if v.trim().is_empty() {
|
||||
Err(VarError::NotPresent)
|
||||
} else {
|
||||
Ok(Some(v))
|
||||
}
|
||||
})
|
||||
.map_err(|_| {
|
||||
crate::error::CodexErr::EnvVar(EnvVarError {
|
||||
var: env_key.clone(),
|
||||
instructions: self.env_key_instructions.clone(),
|
||||
Some(env_key) => {
|
||||
let env_value = if env_key == crate::openai_api_key::OPENAI_API_KEY_ENV_VAR {
|
||||
get_openai_api_key().map_or_else(|| Err(VarError::NotPresent), Ok)
|
||||
} else {
|
||||
std::env::var(env_key)
|
||||
};
|
||||
env_value
|
||||
.and_then(|v| {
|
||||
if v.trim().is_empty() {
|
||||
Err(VarError::NotPresent)
|
||||
} else {
|
||||
Ok(Some(v))
|
||||
}
|
||||
})
|
||||
}),
|
||||
.map_err(|_| {
|
||||
crate::error::CodexErr::EnvVar(EnvVarError {
|
||||
var: env_key.clone(),
|
||||
instructions: self.env_key_instructions.clone(),
|
||||
})
|
||||
})
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@@ -75,6 +109,10 @@ impl ModelProviderInfo {
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
use ModelProviderInfo as P;
|
||||
|
||||
// We do not want to be in the business of adjucating which third-party
|
||||
// providers are bundled with Codex CLI, so we only include the OpenAI
|
||||
// provider by default. Users are encouraged to add to `model_providers`
|
||||
// in config.toml to add their own providers.
|
||||
[
|
||||
(
|
||||
"openai",
|
||||
@@ -84,76 +122,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
env_key: Some("OPENAI_API_KEY".into()),
|
||||
env_key_instructions: Some("Create an API key (https://platform.openai.com) and export it as an environment variable.".into()),
|
||||
wire_api: WireApi::Responses,
|
||||
},
|
||||
),
|
||||
(
|
||||
"openrouter",
|
||||
P {
|
||||
name: "OpenRouter".into(),
|
||||
base_url: "https://openrouter.ai/api/v1".into(),
|
||||
env_key: Some("OPENROUTER_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"gemini",
|
||||
P {
|
||||
name: "Gemini".into(),
|
||||
base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
|
||||
env_key: Some("GEMINI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"ollama",
|
||||
P {
|
||||
name: "Ollama".into(),
|
||||
base_url: "http://localhost:11434/v1".into(),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"mistral",
|
||||
P {
|
||||
name: "Mistral".into(),
|
||||
base_url: "https://api.mistral.ai/v1".into(),
|
||||
env_key: Some("MISTRAL_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"deepseek",
|
||||
P {
|
||||
name: "DeepSeek".into(),
|
||||
base_url: "https://api.deepseek.com".into(),
|
||||
env_key: Some("DEEPSEEK_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"xai",
|
||||
P {
|
||||
name: "xAI".into(),
|
||||
base_url: "https://api.x.ai/v1".into(),
|
||||
env_key: Some("XAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"groq",
|
||||
P {
|
||||
name: "Groq".into(),
|
||||
base_url: "https://api.groq.com/openai/v1".into(),
|
||||
env_key: Some("GROQ_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -161,3 +130,51 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_ollama_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Ollama".into(),
|
||||
base_url: "http://localhost:11434/v1".into(),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_azure_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Azure"
|
||||
base_url = "https://xxxxx.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY"
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Azure".into(),
|
||||
base_url: "https://xxxxx.openai.azure.com/openai".into(),
|
||||
env_key: Some("AZURE_OPENAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: Some(maplit::hashmap! {
|
||||
"api-version".to_string() => "2025-04-01-preview".to_string(),
|
||||
}),
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use base64::Engine;
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde::ser::Serializer;
|
||||
@@ -18,6 +19,10 @@ pub enum ResponseInputItem {
|
||||
call_id: String,
|
||||
output: FunctionCallOutputPayload,
|
||||
},
|
||||
McpToolCallOutput {
|
||||
call_id: String,
|
||||
result: Result<CallToolResult, String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -77,6 +82,19 @@ impl From<ResponseInputItem> for ResponseItem {
|
||||
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
||||
Self::FunctionCallOutput { call_id, output }
|
||||
}
|
||||
ResponseInputItem::McpToolCallOutput { call_id, result } => Self::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
success: Some(result.is_ok()),
|
||||
content: result.map_or_else(
|
||||
|tool_call_err| format!("err: {tool_call_err:?}"),
|
||||
|result| {
|
||||
serde_json::to_string(&result)
|
||||
.unwrap_or_else(|e| format!("JSON serialization error: {e}"))
|
||||
},
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
24
codex-rs/core/src/openai_api_key.rs
Normal file
24
codex-rs/core/src/openai_api_key.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use std::env;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::RwLock;
|
||||
|
||||
pub const OPENAI_API_KEY_ENV_VAR: &str = "OPENAI_API_KEY";
|
||||
|
||||
static OPENAI_API_KEY: LazyLock<RwLock<Option<String>>> = LazyLock::new(|| {
|
||||
let val = env::var(OPENAI_API_KEY_ENV_VAR)
|
||||
.ok()
|
||||
.and_then(|s| if s.is_empty() { None } else { Some(s) });
|
||||
RwLock::new(val)
|
||||
});
|
||||
|
||||
pub fn get_openai_api_key() -> Option<String> {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
OPENAI_API_KEY.read().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn set_openai_api_key(value: String) {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
if !value.is_empty() {
|
||||
*OPENAI_API_KEY.write().unwrap() = Some(value);
|
||||
}
|
||||
}
|
||||
71
codex-rs/core/src/openai_model_info.rs
Normal file
71
codex-rs/core/src/openai_model_info.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
/// Metadata about a model, particularly OpenAI models.
|
||||
/// We may want to consider including details like the pricing for
|
||||
/// input tokens, output tokens, etc., though users will need to be able to
|
||||
/// override this in config.toml, as this information can get out of date.
|
||||
/// Though this would help present more accurate pricing information in the UI.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ModelInfo {
|
||||
/// Size of the context window in tokens.
|
||||
pub(crate) context_window: u64,
|
||||
|
||||
/// Maximum number of output tokens that can be generated for the model.
|
||||
pub(crate) max_output_tokens: u64,
|
||||
}
|
||||
|
||||
/// Note details such as what a model like gpt-4o is aliased to may be out of
|
||||
/// date.
|
||||
pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
|
||||
match name {
|
||||
// https://platform.openai.com/docs/models/o3
|
||||
"o3" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/o4-mini
|
||||
"o4-mini" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/codex-mini-latest
|
||||
"codex-mini-latest" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
||||
// https://platform.openai.com/docs/models/gpt-4.1
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
|
||||
context_window: 1_047_576,
|
||||
max_output_tokens: 32_768,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
||||
// https://platform.openai.com/docs/models/gpt-4o
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||
"gpt-3.5-turbo" => Some(ModelInfo {
|
||||
context_window: 16_385,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
157
codex-rs/core/src/openai_tools.rs
Normal file
157
codex-rs/core/src/openai_tools.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::client_common::Prompt;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(crate) struct ResponsesApiTool {
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
strict: bool,
|
||||
parameters: JsonSchema,
|
||||
}
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
/// Responses API.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub(crate) enum OpenAiTool {
|
||||
#[serde(rename = "function")]
|
||||
Function(ResponsesApiTool),
|
||||
#[serde(rename = "local_shell")]
|
||||
LocalShell {},
|
||||
}
|
||||
|
||||
/// Generic JSON‑Schema subset needed for our tool definitions
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub(crate) enum JsonSchema {
|
||||
String,
|
||||
Number,
|
||||
Array {
|
||||
items: Box<JsonSchema>,
|
||||
},
|
||||
Object {
|
||||
properties: BTreeMap<String, JsonSchema>,
|
||||
required: &'static [&'static str],
|
||||
#[serde(rename = "additionalProperties")]
|
||||
additional_properties: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// Tool usage specification
|
||||
static DEFAULT_TOOLS: LazyLock<Vec<OpenAiTool>> = LazyLock::new(|| {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String),
|
||||
},
|
||||
);
|
||||
properties.insert("workdir".to_string(), JsonSchema::String);
|
||||
properties.insert("timeout".to_string(), JsonSchema::Number);
|
||||
|
||||
vec![OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell",
|
||||
description: "Runs a shell command, and returns its output.",
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: &["command"],
|
||||
additional_properties: false,
|
||||
},
|
||||
})]
|
||||
});
|
||||
|
||||
static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
|
||||
LazyLock::new(|| vec![OpenAiTool::LocalShell {}]);
|
||||
|
||||
/// Returns JSON values that are compatible with Function Calling in the
|
||||
/// Responses API:
|
||||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
|
||||
pub(crate) fn create_tools_json_for_responses_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// Assemble tool list: built-in tools + any extra tools from the prompt.
|
||||
let default_tools = if model.starts_with("codex") {
|
||||
&DEFAULT_CODEX_MODEL_TOOLS
|
||||
} else {
|
||||
&DEFAULT_TOOLS
|
||||
};
|
||||
let mut tools_json = Vec::with_capacity(default_tools.len() + prompt.extra_tools.len());
|
||||
for t in default_tools.iter() {
|
||||
tools_json.push(serde_json::to_value(t)?);
|
||||
}
|
||||
tools_json.extend(
|
||||
prompt
|
||||
.extra_tools
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
|
||||
);
|
||||
|
||||
Ok(tools_json)
|
||||
}
|
||||
|
||||
/// Returns JSON values that are compatible with Function Calling in the
|
||||
/// Chat Completions API:
|
||||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=chat
|
||||
pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// We start with the JSON for the Responses API and than rewrite it to match
|
||||
// the chat completions tool call format.
|
||||
let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
|
||||
let tools_json = responses_api_tools_json
|
||||
.into_iter()
|
||||
.filter_map(|mut tool| {
|
||||
if tool.get("type") != Some(&serde_json::Value::String("function".to_string())) {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(map) = tool.as_object_mut() {
|
||||
// Remove "type" field as it is not needed in chat completions.
|
||||
map.remove("type");
|
||||
Some(json!({
|
||||
"type": "function",
|
||||
"function": map,
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<serde_json::Value>>();
|
||||
Ok(tools_json)
|
||||
}
|
||||
|
||||
fn mcp_tool_to_openai_tool(
|
||||
fully_qualified_name: String,
|
||||
tool: mcp_types::Tool,
|
||||
) -> serde_json::Value {
|
||||
let mcp_types::Tool {
|
||||
description,
|
||||
mut input_schema,
|
||||
..
|
||||
} = tool;
|
||||
|
||||
// OpenAI models mandate the "properties" field in the schema. The Agents
|
||||
// SDK fixed this by inserting an empty object for "properties" if it is not
|
||||
// already present https://github.com/openai/openai-agents-python/issues/449
|
||||
// so here we do the same.
|
||||
if input_schema.properties.is_none() {
|
||||
input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new()));
|
||||
}
|
||||
|
||||
// TODO(mbolin): Change the contract of this function to return
|
||||
// ResponsesApiTool.
|
||||
json!({
|
||||
"name": fully_qualified_name,
|
||||
"description": description,
|
||||
"parameters": input_schema,
|
||||
"type": "function",
|
||||
})
|
||||
}
|
||||
@@ -25,7 +25,7 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||||
|
||||
/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||||
/// string of instructions.
|
||||
pub(crate) async fn create_full_instructions(config: &Config) -> Option<String> {
|
||||
pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||||
match find_project_doc(config).await {
|
||||
Ok(Some(project_doc)) => match &config.instructions {
|
||||
Some(original_instructions) => Some(format!(
|
||||
@@ -168,7 +168,7 @@ mod tests {
|
||||
async fn no_doc_file_returns_none() {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, 4096, None)).await;
|
||||
let res = get_user_instructions(&make_config(&tmp, 4096, None)).await;
|
||||
assert!(
|
||||
res.is_none(),
|
||||
"Expected None when AGENTS.md is absent and no system instructions provided"
|
||||
@@ -182,7 +182,7 @@ mod tests {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap();
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, 4096, None))
|
||||
let res = get_user_instructions(&make_config(&tmp, 4096, None))
|
||||
.await
|
||||
.expect("doc expected");
|
||||
|
||||
@@ -201,7 +201,7 @@ mod tests {
|
||||
let huge = "A".repeat(LIMIT * 2); // 2 KiB
|
||||
fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap();
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, LIMIT, None))
|
||||
let res = get_user_instructions(&make_config(&tmp, LIMIT, None))
|
||||
.await
|
||||
.expect("doc expected");
|
||||
|
||||
@@ -233,7 +233,7 @@ mod tests {
|
||||
let mut cfg = make_config(&repo, 4096, None);
|
||||
cfg.cwd = nested;
|
||||
|
||||
let res = create_full_instructions(&cfg).await.expect("doc expected");
|
||||
let res = get_user_instructions(&cfg).await.expect("doc expected");
|
||||
assert_eq!(res, "root level doc");
|
||||
}
|
||||
|
||||
@@ -243,7 +243,7 @@ mod tests {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
fs::write(tmp.path().join("AGENTS.md"), "something").unwrap();
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, 0, None)).await;
|
||||
let res = get_user_instructions(&make_config(&tmp, 0, None)).await;
|
||||
assert!(
|
||||
res.is_none(),
|
||||
"With limit 0 the function should return None"
|
||||
@@ -259,7 +259,7 @@ mod tests {
|
||||
|
||||
const INSTRUCTIONS: &str = "base instructions";
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)))
|
||||
let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)))
|
||||
.await
|
||||
.expect("should produce a combined instruction string");
|
||||
|
||||
@@ -276,7 +276,7 @@ mod tests {
|
||||
|
||||
const INSTRUCTIONS: &str = "some instructions";
|
||||
|
||||
let res = create_full_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await;
|
||||
let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await;
|
||||
|
||||
assert_eq!(res, Some(INSTRUCTIONS.to_string()));
|
||||
}
|
||||
|
||||
@@ -6,12 +6,15 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::message_history::HistoryEntry;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
|
||||
@@ -37,6 +40,10 @@ pub enum Op {
|
||||
|
||||
/// If not specified, server will use its default model.
|
||||
model: String,
|
||||
|
||||
model_reasoning_effort: ReasoningEffortConfig,
|
||||
model_reasoning_summary: ReasoningSummaryConfig,
|
||||
|
||||
/// Model instructions
|
||||
instructions: Option<String>,
|
||||
/// When to escalate for approval for execution
|
||||
@@ -103,21 +110,17 @@ pub enum Op {
|
||||
GetHistoryEntryRequest { offset: usize, log_id: u64 },
|
||||
}
|
||||
|
||||
/// Determines how liberally commands are auto‑approved by the system.
|
||||
/// Determines the conditions under which the user is consulted to approve
|
||||
/// running the command proposed by Codex.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum AskForApproval {
|
||||
/// Under this policy, only “known safe” commands—as determined by
|
||||
/// Under this policy, only "known safe" commands—as determined by
|
||||
/// `is_safe_command()`—that **only read files** are auto‑approved.
|
||||
/// Everything else will ask the user to approve.
|
||||
#[default]
|
||||
UnlessAllowListed,
|
||||
|
||||
/// In addition to everything allowed by **`Suggest`**, commands that
|
||||
/// *write* to files **within the user’s approved list of writable paths**
|
||||
/// are also auto‑approved.
|
||||
/// TODO(ragona): fix
|
||||
AutoEdit,
|
||||
#[serde(rename = "untrusted")]
|
||||
UnlessTrusted,
|
||||
|
||||
/// *All* commands are auto‑approved, but they are expected to run inside a
|
||||
/// sandbox where network access is disabled and writes are confined to a
|
||||
@@ -130,155 +133,104 @@ pub enum AskForApproval {
|
||||
Never,
|
||||
}
|
||||
|
||||
/// Determines execution restrictions for model shell commands
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct SandboxPolicy {
|
||||
permissions: Vec<SandboxPermission>,
|
||||
/// Determines execution restrictions for model shell commands.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
pub enum SandboxPolicy {
|
||||
/// No restrictions whatsoever. Use with caution.
|
||||
#[serde(rename = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
|
||||
/// Read-only access to the entire file-system.
|
||||
#[serde(rename = "read-only")]
|
||||
ReadOnly,
|
||||
|
||||
/// Same as `ReadOnly` but additionally grants write access to the current
|
||||
/// working directory ("workspace").
|
||||
#[serde(rename = "workspace-write")]
|
||||
WorkspaceWrite {
|
||||
/// Additional folders (beyond cwd and possibly TMPDIR) that should be
|
||||
/// writable from within the sandbox.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
|
||||
/// When set to `true`, outbound network access is allowed. `false` by
|
||||
/// default.
|
||||
#[serde(default)]
|
||||
network_access: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Vec<SandboxPermission>> for SandboxPolicy {
|
||||
fn from(permissions: Vec<SandboxPermission>) -> Self {
|
||||
Self { permissions }
|
||||
impl FromStr for SandboxPolicy {
|
||||
type Err = serde_json::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
serde_json::from_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxPolicy {
|
||||
/// Returns a policy with read-only disk access and no network.
|
||||
pub fn new_read_only_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![SandboxPermission::DiskFullReadAccess],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_read_only_policy_with_writable_roots(writable_roots: &[PathBuf]) -> Self {
|
||||
let mut permissions = Self::new_read_only_policy().permissions;
|
||||
permissions.extend(writable_roots.iter().map(|folder| {
|
||||
SandboxPermission::DiskWriteFolder {
|
||||
folder: folder.clone(),
|
||||
}
|
||||
}));
|
||||
Self { permissions }
|
||||
}
|
||||
|
||||
pub fn new_full_auto_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::DiskWritePlatformUserTempFolder,
|
||||
SandboxPermission::DiskWriteCwd,
|
||||
],
|
||||
SandboxPolicy::ReadOnly
|
||||
}
|
||||
|
||||
/// Returns a policy that can read the entire disk, but can only write to
|
||||
/// the current working directory and the per-user tmp dir on macOS. It does
|
||||
/// not allow network access.
|
||||
pub fn new_workspace_write_policy() -> Self {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Always returns `true` for now, as we do not yet support restricting read
|
||||
/// access.
|
||||
pub fn has_full_disk_read_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullReadAccess))
|
||||
true
|
||||
}
|
||||
|
||||
pub fn has_full_disk_write_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullWriteAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_full_network_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::NetworkFullAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => *network_access,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the list of writable roots that should be passed down to the
|
||||
/// Landlock rules installer, tailored to the current working directory.
|
||||
pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::<PathBuf>::new();
|
||||
for perm in &self.permissions {
|
||||
use SandboxPermission::*;
|
||||
match perm {
|
||||
DiskWritePlatformUserTempFolder => {
|
||||
if cfg!(target_os = "macos") {
|
||||
if let Some(tempdir) = std::env::var_os("TMPDIR") {
|
||||
// Likely something that starts with /var/folders/...
|
||||
let tmpdir_path = PathBuf::from(&tempdir);
|
||||
if tmpdir_path.is_absolute() {
|
||||
writable_roots.push(tmpdir_path.clone());
|
||||
match tmpdir_path.canonicalize() {
|
||||
Ok(canonicalized) => {
|
||||
// Likely something that starts with /private/var/folders/...
|
||||
if canonicalized != tmpdir_path {
|
||||
writable_roots.push(canonicalized);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to canonicalize TMPDIR: {e}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::error!("TMPDIR is not an absolute path: {tempdir:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => Vec::new(),
|
||||
SandboxPolicy::ReadOnly => Vec::new(),
|
||||
SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
|
||||
let mut roots = writable_roots.clone();
|
||||
roots.push(cwd.to_path_buf());
|
||||
|
||||
// For Linux, should this be XDG_RUNTIME_DIR, /run/user/<uid>, or something else?
|
||||
}
|
||||
DiskWritePlatformGlobalTempFolder => {
|
||||
if cfg!(unix) {
|
||||
writable_roots.push(PathBuf::from("/tmp"));
|
||||
// Also include the per-user tmp dir on macOS.
|
||||
// Note this is added dynamically rather than storing it in
|
||||
// writable_roots because writable_roots contains only static
|
||||
// values deserialized from the config file.
|
||||
if cfg!(target_os = "macos") {
|
||||
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
|
||||
roots.push(PathBuf::from(tmpdir));
|
||||
}
|
||||
}
|
||||
DiskWriteCwd => {
|
||||
writable_roots.push(cwd.to_path_buf());
|
||||
}
|
||||
DiskWriteFolder { folder } => {
|
||||
writable_roots.push(folder.clone());
|
||||
}
|
||||
DiskFullReadAccess | NetworkFullAccess => {}
|
||||
DiskFullWriteAccess => {
|
||||
// Currently, we expect callers to only invoke this method
|
||||
// after verifying has_full_disk_write_access() is false.
|
||||
}
|
||||
|
||||
roots
|
||||
}
|
||||
}
|
||||
writable_roots
|
||||
}
|
||||
|
||||
pub fn is_unrestricted(&self) -> bool {
|
||||
self.has_full_disk_read_access()
|
||||
&& self.has_full_disk_write_access()
|
||||
&& self.has_full_network_access()
|
||||
}
|
||||
}
|
||||
|
||||
/// Permissions that should be granted to the sandbox in which the agent
|
||||
/// operates.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum SandboxPermission {
|
||||
/// Is allowed to read all files on disk.
|
||||
DiskFullReadAccess,
|
||||
|
||||
/// Is allowed to write to the operating system's temp dir that
|
||||
/// is restricted to the user the agent is running as. For
|
||||
/// example, on macOS, this is generally something under
|
||||
/// `/var/folders` as opposed to `/tmp`.
|
||||
DiskWritePlatformUserTempFolder,
|
||||
|
||||
/// Is allowed to write to the operating system's shared temp
|
||||
/// dir. On UNIX, this is generally `/tmp`.
|
||||
DiskWritePlatformGlobalTempFolder,
|
||||
|
||||
/// Is allowed to write to the current working directory (in practice, this
|
||||
/// is the `cwd` where `codex` was spawned).
|
||||
DiskWriteCwd,
|
||||
|
||||
/// Is allowed to the specified folder. `PathBuf` must be an
|
||||
/// absolute path, though it is up to the caller to canonicalize
|
||||
/// it if the path contains symlinks.
|
||||
DiskWriteFolder { folder: PathBuf },
|
||||
|
||||
/// Is allowed to write to any file on disk.
|
||||
DiskFullWriteAccess,
|
||||
|
||||
/// Can make arbitrary network requests.
|
||||
NetworkFullAccess,
|
||||
}
|
||||
|
||||
/// User input
|
||||
@@ -321,7 +273,11 @@ pub enum EventMsg {
|
||||
TaskStarted,
|
||||
|
||||
/// Agent has completed all actions
|
||||
TaskComplete,
|
||||
TaskComplete(TaskCompleteEvent),
|
||||
|
||||
/// Token count event, sent periodically to report the number of tokens
|
||||
/// used in the current session.
|
||||
TokenCount(TokenUsage),
|
||||
|
||||
/// Agent text output message
|
||||
AgentMessage(AgentMessageEvent),
|
||||
@@ -365,6 +321,20 @@ pub struct ErrorEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TaskCompleteEvent {
|
||||
pub last_agent_message: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub cached_input_tokens: Option<u64>,
|
||||
pub output_tokens: u64,
|
||||
pub reasoning_output_tokens: Option<u64>,
|
||||
pub total_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentMessageEvent {
|
||||
pub message: String,
|
||||
@@ -391,10 +361,17 @@ pub struct McpToolCallBeginEvent {
|
||||
pub struct McpToolCallEndEvent {
|
||||
/// Identifier for the corresponding McpToolCallBegin that finished.
|
||||
pub call_id: String,
|
||||
/// Whether the tool call was successful. If `false`, `result` might not be present.
|
||||
pub success: bool,
|
||||
/// Result of the tool call. Note this could be an error.
|
||||
pub result: Option<CallToolResult>,
|
||||
pub result: Result<CallToolResult, String>,
|
||||
}
|
||||
|
||||
impl McpToolCallEndEvent {
|
||||
pub fn is_success(&self) -> bool {
|
||||
match &self.result {
|
||||
Ok(result) => !result.is_error.unwrap_or(false),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
@@ -549,7 +526,7 @@ mod tests {
|
||||
id: "1234".to_string(),
|
||||
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||||
session_id,
|
||||
model: "o4-mini".to_string(),
|
||||
model: "codex-mini-latest".to_string(),
|
||||
history_log_id: 0,
|
||||
history_entry_count: 0,
|
||||
}),
|
||||
@@ -557,7 +534,7 @@ mod tests {
|
||||
let serialized = serde_json::to_string(&event).unwrap();
|
||||
assert_eq!(
|
||||
serialized,
|
||||
r#"{"id":"1234","msg":{"type":"session_configured","session_id":"67e55044-10b1-426f-9247-bb680e5fe0c8","model":"o4-mini","history_log_id":0,"history_entry_count":0}}"#
|
||||
r#"{"id":"1234","msg":{"type":"session_configured","session_id":"67e55044-10b1-426f-9247-bb680e5fe0c8","model":"codex-mini-latest","history_log_id":0,"history_entry_count":0}}"#
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,12 +31,12 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
|
||||
match policy {
|
||||
AskForApproval::OnFailure | AskForApproval::AutoEdit | AskForApproval::Never => {
|
||||
AskForApproval::OnFailure | AskForApproval::Never => {
|
||||
// Continue to see if this can be auto-approved.
|
||||
}
|
||||
// TODO(ragona): I'm not sure this is actually correct? I believe in this case
|
||||
// we want to continue to the writable paths check before asking the user.
|
||||
AskForApproval::UnlessAllowListed => {
|
||||
AskForApproval::UnlessTrusted => {
|
||||
return SafetyCheck::AskUser;
|
||||
}
|
||||
}
|
||||
@@ -63,40 +63,71 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
}
|
||||
|
||||
/// For a command to be run _without_ a sandbox, one of the following must be
|
||||
/// true:
|
||||
///
|
||||
/// - the user has explicitly approved the command
|
||||
/// - the command is on the "known safe" list
|
||||
/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
|
||||
pub fn assess_command_safety(
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approved: &HashSet<Vec<String>>,
|
||||
) -> SafetyCheck {
|
||||
let approve_without_sandbox = || SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
use AskForApproval::*;
|
||||
use SandboxPolicy::*;
|
||||
|
||||
// Previously approved or allow-listed commands
|
||||
// All approval modes allow these commands to continue without sandboxing
|
||||
// A command is "trusted" because either:
|
||||
// - it belongs to a set of commands we consider "safe" by default, or
|
||||
// - the user has explicitly approved the command for this session
|
||||
//
|
||||
// Currently, whether a command is "trusted" is a simple boolean, but we
|
||||
// should include more metadata on this command test to indicate whether it
|
||||
// should be run inside a sandbox or not. (This could be something the user
|
||||
// defines as part of `execpolicy`.)
|
||||
//
|
||||
// For example, when `is_known_safe_command(command)` returns `true`, it
|
||||
// would probably be fine to run the command in a sandbox, but when
|
||||
// `approved.contains(command)` is `true`, the user may have approved it for
|
||||
// the session _because_ they know it needs to run outside a sandbox.
|
||||
if is_known_safe_command(command) || approved.contains(command) {
|
||||
// TODO(ragona): I think we should consider running even these inside the sandbox, but it's
|
||||
// a change in behavior so I'm keeping it at parity with upstream for now.
|
||||
return approve_without_sandbox();
|
||||
return SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
}
|
||||
|
||||
// Command was not known-safe or allow-listed
|
||||
if sandbox_policy.is_unrestricted() {
|
||||
approve_without_sandbox()
|
||||
} else {
|
||||
match get_platform_sandbox() {
|
||||
// We have a sandbox, so we can approve the command in all modes
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
// We do not have a sandbox, so we need to consider the approval policy
|
||||
match approval_policy {
|
||||
// Never is our "non-interactive" mode; it must automatically reject
|
||||
AskForApproval::Never => SafetyCheck::Reject {
|
||||
reason: "auto-rejected by user approval settings".to_string(),
|
||||
},
|
||||
// Otherwise, we ask the user for approval
|
||||
_ => SafetyCheck::AskUser,
|
||||
match (approval_policy, sandbox_policy) {
|
||||
(UnlessTrusted, _) => {
|
||||
// Even though the user may have opted into DangerFullAccess,
|
||||
// they also requested that we ask for approval for untrusted
|
||||
// commands.
|
||||
SafetyCheck::AskUser
|
||||
}
|
||||
(OnFailure, DangerFullAccess) | (Never, DangerFullAccess) => SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
},
|
||||
(Never, ReadOnly)
|
||||
| (Never, WorkspaceWrite { .. })
|
||||
| (OnFailure, ReadOnly)
|
||||
| (OnFailure, WorkspaceWrite { .. }) => {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
if matches!(approval_policy, OnFailure) {
|
||||
// Since the command is not trusted, even though the
|
||||
// user has requested to only ask for approval on
|
||||
// failure, we will ask the user because no sandbox is
|
||||
// available.
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
// We are in non-interactive mode and lack approval, so
|
||||
// all we can do is reject the command.
|
||||
SafetyCheck::Reject {
|
||||
reason: "auto-rejected because command is not on trusted list"
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
|
||||
match ev.msg {
|
||||
EventMsg::AgentMessage(_) => saw_message_before_complete = true,
|
||||
EventMsg::TaskComplete => break,
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
panic!("agent reported error in task1: {message}")
|
||||
}
|
||||
@@ -136,7 +136,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
{
|
||||
got_expected = true;
|
||||
}
|
||||
EventMsg::TaskComplete => break,
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
panic!("agent reported error in task2: {message}")
|
||||
}
|
||||
@@ -204,7 +204,7 @@ async fn live_shell_function_call() {
|
||||
assert!(stdout.contains(MARKER));
|
||||
saw_end_with_output = true;
|
||||
}
|
||||
EventMsg::TaskComplete => break,
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::Error(codex_core::protocol::ErrorEvent { message }) => {
|
||||
panic!("agent error during shell test: {message}")
|
||||
}
|
||||
|
||||
@@ -107,6 +107,7 @@ async fn keeps_previous_response_id_between_tasks() {
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
query_params: None,
|
||||
};
|
||||
|
||||
// Init session
|
||||
@@ -132,7 +133,7 @@ async fn keeps_previous_response_id_between_tasks() {
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
if matches!(ev.msg, EventMsg::TaskComplete) {
|
||||
if matches!(ev.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -154,7 +155,7 @@ async fn keeps_previous_response_id_between_tasks() {
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
match ev.msg {
|
||||
EventMsg::TaskComplete => break,
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
panic!("unexpected error: {message}")
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::time::Duration;
|
||||
use codex_core::Codex;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
mod test_support;
|
||||
@@ -95,6 +96,7 @@ async fn retries_on_early_close() {
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: codex_core::WireApi::Responses,
|
||||
query_params: None,
|
||||
};
|
||||
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
@@ -118,7 +120,7 @@ async fn retries_on_early_close() {
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
if matches!(ev.msg, codex_core::protocol::EventMsg::TaskComplete) {
|
||||
if matches!(ev.msg, EventMsg::TaskComplete(_)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user