mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
Compare commits
241 Commits
agentydrag
...
pr1900
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
713eb0dbf0 | ||
|
|
af8c1cdf12 | ||
|
|
57c973b571 | ||
|
|
2d5de795aa | ||
|
|
f25b2e8e2c | ||
|
|
a575effbb0 | ||
|
|
6cef86f05b | ||
|
|
8262ba58b2 | ||
|
|
081caa5a6b | ||
|
|
4344537742 | ||
|
|
64f2f2eca2 | ||
|
|
ae88b69b09 | ||
|
|
ffe24991b7 | ||
|
|
dc468d563f | ||
|
|
3e8bcf0247 | ||
|
|
cda39e417f | ||
|
|
d642b07fcc | ||
|
|
7b3ab968a0 | ||
|
|
02e7965228 | ||
|
|
493e4c9463 | ||
|
|
1f7003b476 | ||
|
|
eaf2fb5b4f | ||
|
|
f8d70d67b6 | ||
|
|
966d957faf | ||
|
|
b90c15abc4 | ||
|
|
31dcae67db | ||
|
|
725dd6be6a | ||
|
|
aff97ed7dd | ||
|
|
afa8f0d617 | ||
|
|
ea7d3f27bd | ||
|
|
f6c8d1117c | ||
|
|
42bd73e150 | ||
|
|
d365cae077 | ||
|
|
0c5fa271bc | ||
|
|
bd24bc320e | ||
|
|
9f91b3da24 | ||
|
|
9285350842 | ||
|
|
e0303dbac0 | ||
|
|
d31e149cb1 | ||
|
|
136b3ee5bf | ||
|
|
fcdb1c4b4d | ||
|
|
906d449760 | ||
|
|
063083af15 | ||
|
|
f58401e203 | ||
|
|
84bcadb8d9 | ||
|
|
e38ce39c51 | ||
|
|
1a33de34b0 | ||
|
|
bd171e5206 | ||
|
|
3f13ebce10 | ||
|
|
7279080edd | ||
|
|
89ab5c3f74 | ||
|
|
6db597ec0c | ||
|
|
2899817c94 | ||
|
|
64cfbbd3c8 | ||
|
|
a6139aa003 | ||
|
|
dc15a5cf0b | ||
|
|
1f3318c1c5 | ||
|
|
e3565a3f43 | ||
|
|
2576fadc74 | ||
|
|
78a1d49fac | ||
|
|
d62b703a21 | ||
|
|
4c9f7b6bcc | ||
|
|
75eecb656e | ||
|
|
81bb1c9e26 | ||
|
|
7e0f506da2 | ||
|
|
929ba50adc | ||
|
|
80555d4ff2 | ||
|
|
97ab8fb610 | ||
|
|
fe62f859a6 | ||
|
|
92f3566d78 | ||
|
|
f20de21cb6 | ||
|
|
bc7beddaa2 | ||
|
|
8360c6a3ec | ||
|
|
f918198bbb | ||
|
|
88ea215c80 | ||
|
|
b67c485d84 | ||
|
|
e2c994e32a | ||
|
|
ad0295b893 | ||
|
|
d3aa5f46b7 | ||
|
|
575590e4c2 | ||
|
|
4aca3e46c8 | ||
|
|
d787434aa8 | ||
|
|
ea69a1d72f | ||
|
|
610addbc2e | ||
|
|
0935e6a875 | ||
|
|
6ce0a5875b | ||
|
|
5a0ad5ab8f | ||
|
|
9aa11269a5 | ||
|
|
06c786b2da | ||
|
|
549846b29a | ||
|
|
96654a5d52 | ||
|
|
861ba86403 | ||
|
|
be0cd34300 | ||
|
|
d86270696e | ||
|
|
defeafb279 | ||
|
|
51b6bdefbe | ||
|
|
35010812c7 | ||
|
|
f2134f6633 | ||
|
|
221ebfcccc | ||
|
|
301ec72107 | ||
|
|
e0e245cc1c | ||
|
|
2f5557056d | ||
|
|
ea01a5ffe2 | ||
|
|
93341797c4 | ||
|
|
347c81ad00 | ||
|
|
3823b32b7a | ||
|
|
6b10e22eb3 | ||
|
|
8828f6f082 | ||
|
|
f8fcaaaf6f | ||
|
|
fc85f4812f | ||
|
|
efe7f3c793 | ||
|
|
f66704a88f | ||
|
|
094d7af8c3 | ||
|
|
2d2df891bb | ||
|
|
80c19ea77c | ||
|
|
19bef7659f | ||
|
|
5ebb7dd34c | ||
|
|
d76f96ce79 | ||
|
|
fcd197d596 | ||
|
|
9102255854 | ||
|
|
7ecd3153a8 | ||
|
|
2405c40026 | ||
|
|
58bed77ba7 | ||
|
|
5a0079fea2 | ||
|
|
c66c99c5b5 | ||
|
|
75b4008094 | ||
|
|
7ee87123a6 | ||
|
|
994c9a874d | ||
|
|
480e82b00d | ||
|
|
508abbe990 | ||
|
|
a1641743a8 | ||
|
|
c9e2def494 | ||
|
|
7af9cedbd7 | ||
|
|
2437a8d17a | ||
|
|
d2be0720b5 | ||
|
|
173386eeac | ||
|
|
4a57afaaf2 | ||
|
|
9f645353e9 | ||
|
|
db84722080 | ||
|
|
6e1838e0d8 | ||
|
|
4fc4e410bd | ||
|
|
6dd62ffa3b | ||
|
|
b4ab7c1b73 | ||
|
|
084236f717 | ||
|
|
bc944e77f5 | ||
|
|
591cb6149a | ||
|
|
d6c4083f98 | ||
|
|
3ef544fb95 | ||
|
|
01c0896f0f | ||
|
|
4082246f6a | ||
|
|
6d82907082 | ||
|
|
ed206d5687 | ||
|
|
d51654822f | ||
|
|
710f728124 | ||
|
|
6cf4b96f9d | ||
|
|
18b2b30841 | ||
|
|
d49d802b06 | ||
|
|
8a6c6cee88 | ||
|
|
8b590105de | ||
|
|
018003e52f | ||
|
|
11fd3123be | ||
|
|
e78ec00e73 | ||
|
|
a06d4f58e4 | ||
|
|
83eefb55fb | ||
|
|
9846adeabf | ||
|
|
d5a2148deb | ||
|
|
cc874c9205 | ||
|
|
6f2b01bb6b | ||
|
|
9cedeadf6a | ||
|
|
327e2254f6 | ||
|
|
e16657ca45 | ||
|
|
bb30ab9e96 | ||
|
|
6949329a7f | ||
|
|
b95a010e86 | ||
|
|
fcbcc40f51 | ||
|
|
643ab1f582 | ||
|
|
d3dbc10479 | ||
|
|
0bc7ee9193 | ||
|
|
2bd3314886 | ||
|
|
5b820c5ce7 | ||
|
|
f14b5adabf | ||
|
|
9c0b413fd1 | ||
|
|
3777e18243 | ||
|
|
0f8ac92390 | ||
|
|
c46bb67d77 | ||
|
|
94f5cad895 | ||
|
|
72504f1d9c | ||
|
|
fa6d507c51 | ||
|
|
a52a2fe7a9 | ||
|
|
bfeb8c92a5 | ||
|
|
9e58076cf5 | ||
|
|
8a424fcfa3 | ||
|
|
341c091c5b | ||
|
|
6b1e4a6846 | ||
|
|
75fa65e054 | ||
|
|
16eafd02ad | ||
|
|
c8051b906f | ||
|
|
82b0cebe8b | ||
|
|
3a23a86f4b | ||
|
|
268267b59e | ||
|
|
4a15ebc1ca | ||
|
|
8d35ad0ef7 | ||
|
|
cc58f1086d | ||
|
|
e444a50cf0 | ||
|
|
f80fc86f18 | ||
|
|
0b9cb2b9e7 | ||
|
|
e0c08cea4f | ||
|
|
0a44c42533 | ||
|
|
a9bed68947 | ||
|
|
fd67a0086c | ||
|
|
c221eab0b5 | ||
|
|
bd5a9e8ba9 | ||
|
|
abcca30d93 | ||
|
|
4cb3c76798 | ||
|
|
6dad5c3b17 | ||
|
|
cd2d84d496 | ||
|
|
688100f7f4 | ||
|
|
f30bf4bbcf | ||
|
|
1b7c8d2569 | ||
|
|
4a341efe92 | ||
|
|
e2efe8da9c | ||
|
|
5a0f236ca4 | ||
|
|
ff8ae1ffa1 | ||
|
|
b3ad764532 | ||
|
|
a331a67b3e | ||
|
|
2e293ce903 | ||
|
|
64feeb3803 | ||
|
|
fa0e17f83a | ||
|
|
a339a7bcce | ||
|
|
fcfe43c7df | ||
|
|
296996d74e | ||
|
|
50924101d2 | ||
|
|
72082164c1 | ||
|
|
e09691337d | ||
|
|
86d5a9d80d | ||
|
|
531ce7626f | ||
|
|
63363a54e5 | ||
|
|
6d65010aad | ||
|
|
0776d78357 | ||
|
|
ed5e848f3e | ||
|
|
5aafe190e2 |
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:22.04
|
||||
FROM ubuntu:24.04
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
# enable 'universe' because musl-tools & clang live there
|
||||
@@ -11,19 +11,17 @@ RUN apt-get update && \
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl git ca-certificates \
|
||||
pkg-config clang musl-tools libssl-dev && \
|
||||
pkg-config clang musl-tools libssl-dev just && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# non-root dev user
|
||||
ARG USER=dev
|
||||
ARG UID=1000
|
||||
RUN useradd -m -u $UID $USER
|
||||
USER $USER
|
||||
# Ubuntu 24.04 ships with user 'ubuntu' already created with UID 1000.
|
||||
USER ubuntu
|
||||
|
||||
# install Rust + musl target as dev user
|
||||
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal && \
|
||||
~/.cargo/bin/rustup target add aarch64-unknown-linux-musl
|
||||
~/.cargo/bin/rustup target add aarch64-unknown-linux-musl && \
|
||||
~/.cargo/bin/rustup component add clippy rustfmt
|
||||
|
||||
ENV PATH="/home/${USER}/.cargo/bin:${PATH}"
|
||||
ENV PATH="/home/ubuntu/.cargo/bin:${PATH}"
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@@ -15,15 +15,13 @@
|
||||
"CARGO_TARGET_DIR": "${containerWorkspaceFolder}/codex-rs/target-arm64"
|
||||
},
|
||||
|
||||
"remoteUser": "dev",
|
||||
"remoteUser": "ubuntu",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"settings": {
|
||||
"terminal.integrated.defaultProfile.linux": "bash"
|
||||
"terminal.integrated.defaultProfile.linux": "bash"
|
||||
},
|
||||
"extensions": [
|
||||
"rust-lang.rust-analyzer"
|
||||
],
|
||||
"extensions": ["rust-lang.rust-analyzer", "tamasfe.even-better-toml"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
9
.github/actions/codex/action.yml
vendored
9
.github/actions/codex/action.yml
vendored
@@ -20,9 +20,9 @@ inputs:
|
||||
description: "Value to use as the CODEX_HOME environment variable when running Codex."
|
||||
required: false
|
||||
codex_release_tag:
|
||||
description: "The release tag of the Codex model to run."
|
||||
description: "The release tag of the Codex model to run, e.g., 'rust-v0.3.0'. Defaults to the latest release."
|
||||
required: false
|
||||
default: "codex-rs-ca8e97fcbcb991e542b8689f2d4eab9d30c399d6-1-rust-v0.0.2505302325"
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -84,7 +84,10 @@ runs:
|
||||
# we will need to update this action.yml file to match.
|
||||
artifact="codex-exec-${triple}.tar.gz"
|
||||
|
||||
gh release download ${{ inputs.codex_release_tag }} --repo openai/codex \
|
||||
TAG_ARG="${{ inputs.codex_release_tag }}"
|
||||
# The usage is `gh release download [<tag>] [flags]`, so if TAG_ARG
|
||||
# is empty, we do not pass it so we can default to the latest release.
|
||||
gh release download ${TAG_ARG:+$TAG_ARG} --repo openai/codex \
|
||||
--pattern "$artifact" --output - \
|
||||
| tar xzO > /usr/local/bin/codex-exec
|
||||
chmod +x /usr/local/bin/codex-exec
|
||||
|
||||
26
.github/actions/codex/bun.lock
vendored
26
.github/actions/codex/bun.lock
vendored
@@ -8,10 +8,10 @@
|
||||
"@actions/github": "^6.0.1",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"typescript": "^5.8.3",
|
||||
"@types/bun": "^1.2.19",
|
||||
"@types/node": "^24.1.0",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.9.2",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -48,27 +48,31 @@
|
||||
|
||||
"@octokit/types": ["@octokit/types@13.10.0", "", { "dependencies": { "@octokit/openapi-types": "^24.2.0" } }, "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.2.13", "", { "dependencies": { "bun-types": "1.2.13" } }, "sha512-u6vXep/i9VBxoJl3GjZsl/BFIsvML8DfVDO0RYLEwtSZSp981kEO1V5NwRcO1CPJ7AmvpbnDCiMKo3JvbDEjAg=="],
|
||||
"@types/bun": ["@types/bun@1.2.19", "", { "dependencies": { "bun-types": "1.2.19" } }, "sha512-d9ZCmrH3CJ2uYKXQIUuZ/pUnTqIvLDS0SK7pFmbx8ma+ziH/FRMoAq5bYpRG7y+w1gl+HgyNZbtqgMq4W4e2Lg=="],
|
||||
|
||||
"@types/node": ["@types/node@22.15.21", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-EV/37Td6c+MgKAbkcLG6vqZ2zEYHD7bvSrzqqs2RIhbA6w3x+Dqz8MZM3sP6kGTeLrdoOgKZe+Xja7tUB2DNkQ=="],
|
||||
"@types/node": ["@types/node@24.1.0", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w=="],
|
||||
|
||||
"@types/react": ["@types/react@19.1.8", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g=="],
|
||||
|
||||
"before-after-hook": ["before-after-hook@2.2.3", "", {}, "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ=="],
|
||||
|
||||
"bun-types": ["bun-types@1.2.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-rRjA1T6n7wto4gxhAO/ErZEtOXyEZEmnIHQfl0Dt1QQSB4QV0iP6BZ9/YB5fZaHFQ2dwHFrmPaRQ9GGMX01k9Q=="],
|
||||
"bun-types": ["bun-types@1.2.19", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-uAOTaZSPuYsWIXRpj7o56Let0g/wjihKCkeRqUBhlLVM/Bt+Fj9xTo+LhC1OV1XDaGkz4hNC80et5xgy+9KTHQ=="],
|
||||
|
||||
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
|
||||
|
||||
"deprecation": ["deprecation@2.3.1", "", {}, "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ=="],
|
||||
|
||||
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
|
||||
|
||||
"prettier": ["prettier@3.5.3", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw=="],
|
||||
"prettier": ["prettier@3.6.2", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ=="],
|
||||
|
||||
"tunnel": ["tunnel@0.0.6", "", {}, "sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg=="],
|
||||
|
||||
"typescript": ["typescript@5.8.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="],
|
||||
"typescript": ["typescript@5.9.2", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A=="],
|
||||
|
||||
"undici": ["undici@5.29.0", "", { "dependencies": { "@fastify/busboy": "^2.0.0" } }, "sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg=="],
|
||||
|
||||
"undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="],
|
||||
"undici-types": ["undici-types@7.8.0", "", {}, "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw=="],
|
||||
|
||||
"universal-user-agent": ["universal-user-agent@6.0.1", "", {}, "sha512-yCzhz6FN2wU1NiiQRogkTQszlQSlpWaw8SvVegAc+bDxbzHgh1vX8uIe8OYyMH6DwH+sdTJsgMl36+mSMdRJIQ=="],
|
||||
|
||||
@@ -78,6 +82,8 @@
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types": ["@octokit/types@12.6.0", "", { "dependencies": { "@octokit/openapi-types": "^20.0.0" } }, "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw=="],
|
||||
|
||||
"bun-types/@types/node": ["@types/node@24.0.13", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-Qm9OYVOFHFYg3wJoTSrz80hoec5Lia/dPp84do3X7dZvLikQvM1YpmvTBEdIr/e+U8HTkFjLHLnl78K/qjf+jQ=="],
|
||||
|
||||
"@octokit/plugin-paginate-rest/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
|
||||
"@octokit/plugin-rest-endpoint-methods/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
|
||||
|
||||
8
.github/actions/codex/package.json
vendored
8
.github/actions/codex/package.json
vendored
@@ -13,9 +13,9 @@
|
||||
"@actions/github": "^6.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.2.11",
|
||||
"@types/node": "^22.15.21",
|
||||
"prettier": "^3.5.3",
|
||||
"typescript": "^5.8.3"
|
||||
"@types/bun": "^1.2.19",
|
||||
"@types/node": "^24.1.0",
|
||||
"prettier": "^3.6.2",
|
||||
"typescript": "^5.9.2"
|
||||
}
|
||||
}
|
||||
|
||||
33
.github/actions/codex/src/process-label.ts
vendored
33
.github/actions/codex/src/process-label.ts
vendored
@@ -91,7 +91,38 @@ async function processLabel(
|
||||
labelConfig: LabelConfig,
|
||||
): Promise<void> {
|
||||
const template = labelConfig.getPromptTemplate();
|
||||
const populatedTemplate = await renderPromptTemplate(template, ctx);
|
||||
|
||||
// If this is a review label, prepend explicit PR-diff scoping guidance to
|
||||
// reduce out-of-scope feedback. Do this before rendering so placeholders in
|
||||
// the guidance (e.g., {CODEX_ACTION_GITHUB_EVENT_PATH}) are substituted.
|
||||
const isReview = label.toLowerCase().includes("review");
|
||||
const reviewScopeGuidance = `
|
||||
PR Diff Scope
|
||||
- Only review changes between the PR's merge-base and head; do not comment on commits or files outside this range.
|
||||
- Derive the base/head SHAs from the event JSON at {CODEX_ACTION_GITHUB_EVENT_PATH}, then compute and use the PR diff for all analysis and comments.
|
||||
|
||||
Commands to determine scope
|
||||
- Resolve SHAs:
|
||||
- BASE_SHA=$(jq -r '.pull_request.base.sha // .pull_request.base.ref' "{CODEX_ACTION_GITHUB_EVENT_PATH}")
|
||||
- HEAD_SHA=$(jq -r '.pull_request.head.sha // .pull_request.head.ref' "{CODEX_ACTION_GITHUB_EVENT_PATH}")
|
||||
- BASE_SHA=$(git rev-parse "$BASE_SHA")
|
||||
- HEAD_SHA=$(git rev-parse "$HEAD_SHA")
|
||||
- Prefer triple-dot (merge-base) semantics for PR diffs:
|
||||
- Changed commits: git log --oneline "$BASE_SHA...$HEAD_SHA"
|
||||
- Changed files: git diff --name-status "$BASE_SHA...$HEAD_SHA"
|
||||
- Review hunks: git diff -U0 "$BASE_SHA...$HEAD_SHA"
|
||||
|
||||
Review rules
|
||||
- Anchor every comment to a file and hunk present in git diff "$BASE_SHA...$HEAD_SHA".
|
||||
- If you mention context outside the diff, label it as "Follow-up (outside this PR scope)" and keep it brief (<=2 bullets).
|
||||
- Do not critique commits or files not reachable in the PR range (merge-base(base, head) → head).
|
||||
`.trim();
|
||||
|
||||
const effectiveTemplate = isReview
|
||||
? `${reviewScopeGuidance}\n\n${template}`
|
||||
: template;
|
||||
|
||||
const populatedTemplate = await renderPromptTemplate(effectiveTemplate, ctx);
|
||||
|
||||
// Always run Codex and post the resulting message as a comment.
|
||||
let commentBody = await runCodex(populatedTemplate, ctx);
|
||||
|
||||
139
.github/codex/labels/codex-rust-review.md
vendored
Normal file
139
.github/codex/labels/codex-rust-review.md
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
Review this PR and respond with a very concise final message, formatted in Markdown.
|
||||
|
||||
There should be a summary of the changes (1-2 sentences) and a few bullet points if necessary.
|
||||
|
||||
Then provide the **review** (1-2 sentences plus bullet points, friendly tone).
|
||||
|
||||
Things to look out for when doing the review:
|
||||
|
||||
## General Principles
|
||||
|
||||
- **Make sure the pull request body explains the motivation behind the change.** If the author has failed to do this, call it out, and if you think you can deduce the motivation behind the change, propose copy.
|
||||
- Ideally, the PR body also contains a small summary of the change. For small changes, the PR title may be sufficient.
|
||||
- Each PR should ideally do one conceptual thing. For example, if a PR does a refactoring as well as introducing a new feature, push back and suggest the refactoring be done in a separate PR. This makes things easier for the reviewer, as refactoring changes can often be far-reaching, yet quick to review.
|
||||
- When introducing new code, be on the lookout for code that duplicates existing code. When found, propose a way to refactor the existing code such that it should be reused.
|
||||
|
||||
## Code Organization
|
||||
|
||||
- Each create in the Cargo workspace in `codex-rs` has a specific purpose: make a note if you believe new code is not introduced in the correct crate.
|
||||
- When possible, try to keep the `core` crate as small as possible. Non-core but shared logic is often a good candidate for `codex-rs/common`.
|
||||
- Be wary of large files and offer suggestions for how to break things into more reasonably-sized files.
|
||||
- Rust files should generally be organized such that the public parts of the API appear near the top of the file and helper functions go below. This is analagous to the "inverted pyramid" structure that is favored in journalism.
|
||||
|
||||
## Assertions in Tests
|
||||
|
||||
Assert the equality of the entire objects instead of doing "piecemeal comparisons," performing `assert_eq!()` on individual fields.
|
||||
|
||||
Note that unit tests also function as "executable documentation." As shown in the following example, "piecemeal comparisons" are often more verbose, provide less coverage, and are not as useful as executable documentation.
|
||||
|
||||
For example, suppose you have the following enum:
|
||||
|
||||
```rust
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum Message {
|
||||
Request {
|
||||
id: String,
|
||||
method: String,
|
||||
params: Option<serde_json::Value>,
|
||||
},
|
||||
Notification {
|
||||
method: String,
|
||||
params: Option<serde_json::Value>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
This is an example of a _piecemeal_ comparison:
|
||||
|
||||
```rust
|
||||
// BAD: Piecemeal Comparison
|
||||
|
||||
#[test]
|
||||
fn test_get_latest_messages() {
|
||||
let messages = get_latest_messages();
|
||||
assert_eq!(messages.len(), 2);
|
||||
|
||||
let m0 = &messages[0];
|
||||
match m0 {
|
||||
Message::Request { id, method, params } => {
|
||||
assert_eq!(id, "123");
|
||||
assert_eq!(method, "subscribe");
|
||||
assert_eq!(
|
||||
*params,
|
||||
Some(json!({
|
||||
"conversation_id": "x42z86"
|
||||
}))
|
||||
)
|
||||
}
|
||||
Message::Notification { .. } => {
|
||||
panic!("expected Request");
|
||||
}
|
||||
}
|
||||
|
||||
let m1 = &messages[1];
|
||||
match m1 {
|
||||
Message::Request { .. } => {
|
||||
panic!("expected Notification");
|
||||
}
|
||||
Message::Notification { method, params } => {
|
||||
assert_eq!(method, "log");
|
||||
assert_eq!(
|
||||
*params,
|
||||
Some(json!({
|
||||
"level": "info",
|
||||
"message": "subscribed"
|
||||
}))
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This is a _deep_ comparison:
|
||||
|
||||
```rust
|
||||
// GOOD: Verify the entire structure with a single assert_eq!().
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_get_latest_messages() {
|
||||
let messages = get_latest_messages();
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
Message::Request {
|
||||
id: "123".to_string(),
|
||||
method: "subscribe".to_string(),
|
||||
params: Some(json!({
|
||||
"conversation_id": "x42z86"
|
||||
})),
|
||||
},
|
||||
Message::Notification {
|
||||
method: "log".to_string(),
|
||||
params: Some(json!({
|
||||
"level": "info",
|
||||
"message": "subscribed"
|
||||
})),
|
||||
},
|
||||
],
|
||||
messages,
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
## More Tactical Rust Things To Look Out For
|
||||
|
||||
- Do not use `unsafe` (unless you have a really, really good reason like using an operating system API directly and no safe wrapper exists). For example, there are cases where it is tempting to use `unsafe` in order to use `std::env::set_var()`, but this indeed `unsafe` and has led to race conditions on multiple occasions. (When this happens, find a mechanism other than environment variables to use for configuration.)
|
||||
- Encourage the use of small enums or the newtype pattern in Rust if it helps readability without adding significant cognitive load or lines of code.
|
||||
- If you see opportunities for the changes in a diff to use more idiomatic Rust, please make specific recommendations. For example, favor the use of expressions over `return`.
|
||||
- When modifying a `Cargo.toml` file, make sure that dependency lists stay alphabetically sorted. Also consider whether a new dependency is added to the appropriate place (e.g., `[dependencies]` versus `[dev-dependencies]`)
|
||||
|
||||
## Pull Request Body
|
||||
|
||||
- If the nature of the change seems to have a visual component (which is often the case for changes to `codex-rs/tui`), recommend including a screenshot or video to demonstrate the change, if appropriate.
|
||||
- References to existing GitHub issues and PRs are encouraged, where appropriate, though you likely do not have network access, so may not be able to help here.
|
||||
|
||||
# PR Information
|
||||
|
||||
{CODEX_ACTION_GITHUB_EVENT_PATH} contains the JSON that triggered this GitHub workflow. It contains the `base` and `head` refs that define this PR. Both refs are available locally.
|
||||
26
.github/dependabot.yaml
vendored
Normal file
26
.github/dependabot.yaml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/dependabot-options-reference#package-ecosystem-
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: bun
|
||||
directory: .github/actions/codex
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: cargo
|
||||
directories:
|
||||
- codex-rs
|
||||
- codex-rs/*
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: devcontainers
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: docker
|
||||
directory: codex-cli
|
||||
schedule:
|
||||
interval: weekly
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
9
.github/workflows/ci.yml
vendored
9
.github/workflows/ci.yml
vendored
@@ -74,7 +74,12 @@ jobs:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: pnpm stage-release
|
||||
|
||||
- name: Ensure README.md contains only ASCII and certain Unicode code points
|
||||
- name: Ensure root README.md contains only ASCII and certain Unicode code points
|
||||
run: ./scripts/asciicheck.py README.md
|
||||
- name: Check README ToC
|
||||
- name: Check root README ToC
|
||||
run: python3 scripts/readme_toc.py README.md
|
||||
|
||||
- name: Ensure codex-cli/README.md contains only ASCII and certain Unicode code points
|
||||
run: ./scripts/asciicheck.py codex-cli/README.md
|
||||
- name: Check codex-cli/README ToC
|
||||
run: python3 scripts/readme_toc.py codex-cli/README.md
|
||||
|
||||
4
.github/workflows/codex.yml
vendored
4
.github/workflows/codex.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
(github.event_name == 'issues' && (
|
||||
(github.event.action == 'labeled' && (github.event.label.name == 'codex-attempt' || github.event.label.name == 'codex-triage'))
|
||||
)) ||
|
||||
(github.event_name == 'pull_request' && github.event.action == 'labeled' && github.event.label.name == 'codex-review')
|
||||
(github.event_name == 'pull_request' && github.event.action == 'labeled' && (github.event.label.name == 'codex-review' || github.event.label.name == 'codex-rust-review'))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # can push or create branches
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: x86_64-unknown-linux-gnu
|
||||
components: clippy
|
||||
|
||||
4
.github/workflows/rust-ci.yml
vendored
4
.github/workflows/rust-ci.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: cargo fmt
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
components: clippy
|
||||
|
||||
37
.github/workflows/rust-release.yml
vendored
37
.github/workflows/rust-release.yml
vendored
@@ -15,9 +15,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
TAG_REGEX: '^rust-v[0-9]+\.[0-9]+\.[0-9]+$'
|
||||
|
||||
jobs:
|
||||
tag-check:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -33,8 +30,8 @@ jobs:
|
||||
# 1. Must be a tag and match the regex
|
||||
[[ "${GITHUB_REF_TYPE}" == "tag" ]] \
|
||||
|| { echo "❌ Not a tag push"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ${TAG_REGEX} ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' != ${TAG_REGEX}"; exit 1; }
|
||||
[[ "${GITHUB_REF_NAME}" =~ ^rust-v[0-9]+\.[0-9]+\.[0-9]+(-(alpha|beta)(\.[0-9]+)?)?$ ]] \
|
||||
|| { echo "❌ Tag '${GITHUB_REF_NAME}' doesn't match expected format"; exit 1; }
|
||||
|
||||
# 2. Extract versions
|
||||
tag_ver="${GITHUB_REF_NAME#rust-v}"
|
||||
@@ -76,7 +73,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@1.87
|
||||
- uses: dtolnay/rust-toolchain@1.88
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
@@ -96,7 +93,7 @@ jobs:
|
||||
sudo apt install -y musl-tools pkg-config
|
||||
|
||||
- name: Cargo build
|
||||
run: cargo build --target ${{ matrix.target }} --release --all-targets --all-features
|
||||
run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-exec --bin codex-linux-sandbox
|
||||
|
||||
- name: Stage artifacts
|
||||
shell: bash
|
||||
@@ -160,9 +157,7 @@ jobs:
|
||||
release:
|
||||
needs: build
|
||||
name: release
|
||||
runs-on: ubuntu-24.04
|
||||
env:
|
||||
RELEASE_TAG: codex-rs-${{ github.sha }}-${{ github.run_attempt }}-${{ github.ref_name }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
@@ -172,17 +167,27 @@ jobs:
|
||||
- name: List
|
||||
run: ls -R dist/
|
||||
|
||||
- uses: softprops/action-gh-release@v2
|
||||
- name: Define release name
|
||||
id: release_name
|
||||
run: |
|
||||
# Extract the version from the tag name, which is in the format
|
||||
# "rust-v0.1.0".
|
||||
version="${GITHUB_REF_NAME#rust-v}"
|
||||
echo "name=${version}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ env.RELEASE_TAG }}
|
||||
name: ${{ steps.release_name.outputs.name }}
|
||||
tag_name: ${{ github.ref_name }}
|
||||
files: dist/**
|
||||
# For now, tag releases as "prerelease" because we are not claiming
|
||||
# the Rust CLI is stable yet.
|
||||
prerelease: true
|
||||
# Mark as prerelease only when the version has a suffix after x.y.z
|
||||
# (e.g. -alpha, -beta). Otherwise publish a normal release.
|
||||
prerelease: ${{ contains(steps.release_name.outputs.name, '-') }}
|
||||
|
||||
- uses: facebook/dotslash-publish-release@v2
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag: ${{ env.RELEASE_TAG }}
|
||||
tag: ${{ github.ref_name }}
|
||||
config: .github/dotslash-config.json
|
||||
|
||||
5
.vscode/extensions.json
vendored
Normal file
5
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"tamasfe.even-better-toml",
|
||||
]
|
||||
}
|
||||
18
.vscode/launch.json
vendored
Normal file
18
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Cargo launch",
|
||||
"cargo": {
|
||||
"cwd": "${workspaceFolder}/codex-rs",
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=codex-tui"
|
||||
]
|
||||
},
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
||||
18
.vscode/settings.json
vendored
Normal file
18
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"rust-analyzer.checkOnSave": true,
|
||||
"rust-analyzer.check.command": "clippy",
|
||||
"rust-analyzer.check.extraArgs": ["--all-features", "--tests"],
|
||||
"rust-analyzer.rustfmt.extraArgs": ["--config", "imports_granularity=Item"],
|
||||
"[rust]": {
|
||||
"editor.defaultFormatter": "rust-lang.rust-analyzer",
|
||||
"editor.formatOnSave": true,
|
||||
},
|
||||
"[toml]": {
|
||||
"editor.defaultFormatter": "tamasfe.even-better-toml",
|
||||
"editor.formatOnSave": true,
|
||||
},
|
||||
// Array order for options in ~/.codex/config.toml such as `notify` and the
|
||||
// `args` for an MCP server is significant, so we disable reordering.
|
||||
"evenBetterToml.formatter.reorderArrays": false,
|
||||
"evenBetterToml.formatter.reorderKeys": true,
|
||||
}
|
||||
@@ -2,4 +2,10 @@
|
||||
|
||||
In the codex-rs folder where the rust code lives:
|
||||
|
||||
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
|
||||
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` or `CODEX_SANDBOX_ENV_VAR`.
|
||||
- You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
|
||||
- Similarly, when you spawn a process using Seatbelt (`/usr/bin/sandbox-exec`), `CODEX_SANDBOX=seatbelt` will be set on the child process. Integration tests that want to run Seatbelt themselves cannot be run under Seatbelt, so checks for `CODEX_SANDBOX=seatbelt` are also often used to early exit out of tests, as appropriate.
|
||||
|
||||
Before creating a pull request with changes to `codex-rs`, run `just fmt` (in `codex-rs` directory) to format the code and `just fix` (in `codex-rs` directory) to fix any linter issues in the code, ensure the test suite passes by running `cargo test --all-features` in the `codex-rs` directory.
|
||||
|
||||
When making individual changes prefer running tests on individual files or projects first.
|
||||
|
||||
4
NOTICE
4
NOTICE
@@ -1,2 +1,6 @@
|
||||
OpenAI Codex
|
||||
Copyright 2025 OpenAI
|
||||
|
||||
This project includes code derived from [Ratatui](https://github.com/ratatui/ratatui), licensed under the MIT license.
|
||||
Copyright (c) 2016-2022 Florian Dehau
|
||||
Copyright (c) 2023-2025 The Ratatui Developers
|
||||
|
||||
614
README.md
614
README.md
@@ -1,9 +1,11 @@
|
||||
<h1 align="center">OpenAI Codex CLI</h1>
|
||||
<p align="center">Lightweight coding agent that runs in your terminal</p>
|
||||
|
||||
<p align="center"><code>npm i -g @openai/codex</code></p>
|
||||
<p align="center"><code>npm i -g @openai/codex</code><br />or <code>brew install codex</code></p>
|
||||
|
||||

|
||||
This is the home of the **Codex CLI**, which is a coding agent from OpenAI that runs locally on your computer. If you are looking for the _cloud-based agent_ from OpenAI, **Codex [Web]**, see <https://chatgpt.com/codex>.
|
||||
|
||||
<!--  -->
|
||||
|
||||
---
|
||||
|
||||
@@ -14,6 +16,9 @@
|
||||
|
||||
- [Experimental technology disclaimer](#experimental-technology-disclaimer)
|
||||
- [Quickstart](#quickstart)
|
||||
- [OpenAI API Users](#openai-api-users)
|
||||
- [OpenAI Plus/Pro Users](#openai-pluspro-users)
|
||||
- [Using Open Source Models](#using-open-source-models)
|
||||
- [Why Codex?](#why-codex)
|
||||
- [Security model & permissions](#security-model--permissions)
|
||||
- [Platform sandboxing details](#platform-sandboxing-details)
|
||||
@@ -21,24 +26,17 @@
|
||||
- [CLI reference](#cli-reference)
|
||||
- [Memory & project docs](#memory--project-docs)
|
||||
- [Non-interactive / CI mode](#non-interactive--ci-mode)
|
||||
- [Model Context Protocol (MCP)](#model-context-protocol-mcp)
|
||||
- [Tracing / verbose logging](#tracing--verbose-logging)
|
||||
- [Recipes](#recipes)
|
||||
- [Installation](#installation)
|
||||
- [Configuration guide](#configuration-guide)
|
||||
- [Basic configuration parameters](#basic-configuration-parameters)
|
||||
- [Custom AI provider configuration](#custom-ai-provider-configuration)
|
||||
- [History configuration](#history-configuration)
|
||||
- [Configuration examples](#configuration-examples)
|
||||
- [Full configuration example](#full-configuration-example)
|
||||
- [Custom instructions](#custom-instructions)
|
||||
- [Environment variables setup](#environment-variables-setup)
|
||||
- [DotSlash](#dotslash)
|
||||
- [Configuration](#configuration)
|
||||
- [FAQ](#faq)
|
||||
- [Zero data retention (ZDR) usage](#zero-data-retention-zdr-usage)
|
||||
- [Codex open source fund](#codex-open-source-fund)
|
||||
- [Contributing](#contributing)
|
||||
- [Development workflow](#development-workflow)
|
||||
- [Git hooks with Husky](#git-hooks-with-husky)
|
||||
- [Debugging](#debugging)
|
||||
- [Writing high-impact code changes](#writing-high-impact-code-changes)
|
||||
- [Opening a pull request](#opening-a-pull-request)
|
||||
- [Review process](#review-process)
|
||||
@@ -47,8 +45,6 @@
|
||||
- [Contributor license agreement (CLA)](#contributor-license-agreement-cla)
|
||||
- [Quick fixes](#quick-fixes)
|
||||
- [Releasing `codex`](#releasing-codex)
|
||||
- [Alternative build options](#alternative-build-options)
|
||||
- [Nix flake development](#nix-flake-development)
|
||||
- [Security & responsible AI](#security--responsible-ai)
|
||||
- [License](#license)
|
||||
|
||||
@@ -71,54 +67,100 @@ Help us improve by filing issues or submitting PRs (see the section below for ho
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install globally:
|
||||
Install globally with your preferred package manager:
|
||||
|
||||
```shell
|
||||
npm install -g @openai/codex
|
||||
npm install -g @openai/codex # Alternatively: `brew install codex`
|
||||
```
|
||||
|
||||
Or go to the [latest GitHub Release](https://github.com/openai/codex/releases/latest) and download the appropriate binary for your platform.
|
||||
|
||||
### OpenAI API Users
|
||||
|
||||
Next, set your OpenAI API key as an environment variable:
|
||||
|
||||
```shell
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
```
|
||||
|
||||
> **Note:** This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`) but we recommend setting for the session. **Tip:** You can also place your API key into a `.env` file at the root of your project:
|
||||
>
|
||||
> ```env
|
||||
> OPENAI_API_KEY=your-api-key-here
|
||||
> ```
|
||||
>
|
||||
> The CLI will automatically load variables from `.env` (via `dotenv/config`).
|
||||
> [!NOTE]
|
||||
> This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`), but we recommend setting it for the session.
|
||||
|
||||
### OpenAI Plus/Pro Users
|
||||
|
||||
If you have a paid OpenAI account, run the following to start the login process:
|
||||
|
||||
```
|
||||
codex login
|
||||
```
|
||||
|
||||
If you complete the process successfully, you should have a `~/.codex/auth.json` file that contains the credentials that Codex will use.
|
||||
|
||||
To verify whether you are currently logged in, run:
|
||||
|
||||
```
|
||||
codex login status
|
||||
```
|
||||
|
||||
If you encounter problems with the login flow, please comment on <https://github.com/openai/codex/issues/1243>.
|
||||
|
||||
<details>
|
||||
<summary><strong>Use <code>--provider</code> to use other models</strong></summary>
|
||||
<summary><strong>Use <code>--profile</code> to use other models</strong></summary>
|
||||
|
||||
> Codex also allows you to use other providers that support the OpenAI Chat Completions API. You can set the provider in the config file or use the `--provider` flag. The possible options for `--provider` are:
|
||||
>
|
||||
> - openai (default)
|
||||
> - openrouter
|
||||
> - azure
|
||||
> - gemini
|
||||
> - ollama
|
||||
> - mistral
|
||||
> - deepseek
|
||||
> - xai
|
||||
> - groq
|
||||
> - arceeai
|
||||
> - any other provider that is compatible with the OpenAI API
|
||||
>
|
||||
> If you use a provider other than OpenAI, you will need to set the API key for the provider in the config file or in the environment variable as:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_API_KEY="your-api-key-here"
|
||||
> ```
|
||||
>
|
||||
> If you use a provider not listed above, you must also set the base URL for the provider:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_BASE_URL="https://your-provider-api-base-url"
|
||||
> ```
|
||||
Codex also allows you to use other providers that support the OpenAI Chat Completions (or Responses) API.
|
||||
|
||||
To do so, you must first define custom [providers](./config.md#model_providers) in `~/.codex/config.toml`. For example, the provider for a standard Ollama setup would be defined as follows:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
```
|
||||
|
||||
The `base_url` will have `/chat/completions` appended to it to build the full URL for the request.
|
||||
|
||||
For providers that also require an `Authorization` header of the form `Bearer: SECRET`, an `env_key` can be specified, which indicates the environment variable to read to use as the value of `SECRET` when making a request:
|
||||
|
||||
```toml
|
||||
[model_providers.openrouter]
|
||||
name = "OpenRouter"
|
||||
base_url = "https://openrouter.ai/api/v1"
|
||||
env_key = "OPENROUTER_API_KEY"
|
||||
```
|
||||
|
||||
Providers that speak the Responses API are also supported by adding `wire_api = "responses"` as part of the definition. Accessing OpenAI models via Azure is an example of such a provider, though it also requires specifying additional `query_params` that need to be appended to the request URL:
|
||||
|
||||
```toml
|
||||
[model_providers.azure]
|
||||
name = "Azure"
|
||||
# Make sure you set the appropriate subdomain for this URL.
|
||||
base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY" # Or "OPENAI_API_KEY", whichever you use.
|
||||
# Newer versions appear to support the responses API, see https://github.com/openai/codex/pull/1321
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
wire_api = "responses"
|
||||
```
|
||||
|
||||
Once you have defined a provider you wish to use, you can configure it as your default provider as follows:
|
||||
|
||||
```toml
|
||||
model_provider = "azure"
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> If you find yourself experimenting with a variety of models and providers, then you likely want to invest in defining a _profile_ for each configuration like so:
|
||||
|
||||
```toml
|
||||
[profiles.o3]
|
||||
model_provider = "azure"
|
||||
model = "o3"
|
||||
|
||||
[profiles.mistral]
|
||||
model_provider = "ollama"
|
||||
model = "mistral"
|
||||
```
|
||||
|
||||
This way, you can specify one command-line argument (.e.g., `--profile o3`, `--profile mistral`) to override multiple settings together.
|
||||
|
||||
</details>
|
||||
<br />
|
||||
@@ -136,7 +178,7 @@ codex "explain this codebase to me"
|
||||
```
|
||||
|
||||
```shell
|
||||
codex --approval-mode full-auto "create the fanciest todo-list app"
|
||||
codex --full-auto "create the fanciest todo-list app"
|
||||
```
|
||||
|
||||
That's it - Codex will scaffold a file, run it inside a sandbox, install any
|
||||
@@ -145,6 +187,41 @@ they'll be committed to your working directory.
|
||||
|
||||
---
|
||||
|
||||
## Using Open Source Models
|
||||
|
||||
Codex can run fully locally against an OpenAI-compatible OSS host (like Ollama) using the `--oss` flag:
|
||||
|
||||
- Interactive UI:
|
||||
- codex --oss
|
||||
- Non-interactive (programmatic) mode:
|
||||
- echo "Refactor utils" | codex exec --oss
|
||||
|
||||
Model selection when using `--oss`:
|
||||
|
||||
- If you omit `-m/--model`, Codex defaults to -m gpt-oss:20b and will verify it exists locally (downloading if needed).
|
||||
- To pick a different size, pass one of:
|
||||
- -m "gpt-oss:20b"
|
||||
- -m "gpt-oss:120b"
|
||||
|
||||
Point Codex at your own OSS host:
|
||||
|
||||
- By default, `--oss` talks to http://localhost:11434/v1.
|
||||
- To use a different host, set one of these environment variables before running Codex:
|
||||
- CODEX_OSS_BASE_URL, for example:
|
||||
- CODEX_OSS_BASE_URL="http://my-ollama.example.com:11434/v1" codex --oss -m gpt-oss:20b
|
||||
- or CODEX_OSS_PORT (when the host is localhost):
|
||||
- CODEX_OSS_PORT=11434 codex --oss
|
||||
|
||||
Advanced: you can persist this in your config instead of environment variables by overriding the built-in `oss` provider in `~/.codex/config.toml`:
|
||||
|
||||
```toml
|
||||
[model_providers.oss]
|
||||
name = "Open Source"
|
||||
base_url = "http://my-ollama.example.com:11434/v1"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Why Codex?
|
||||
|
||||
Codex CLI is built for developers who already **live in the terminal** and want
|
||||
@@ -162,41 +239,35 @@ And it's **fully open-source** so you can see and contribute to how it develops!
|
||||
|
||||
## Security model & permissions
|
||||
|
||||
Codex lets you decide _how much autonomy_ the agent receives and auto-approval policy via the
|
||||
`--approval-mode` flag (or the interactive onboarding prompt):
|
||||
Codex lets you decide _how much autonomy_ you want to grant the agent. The following options can be configured independently:
|
||||
|
||||
| Mode | What the agent may do without asking | Still requires approval |
|
||||
| ------------------------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **Suggest** <br>(default) | <li>Read any file in the repo | <li>**All** file writes/patches<li> **Any** arbitrary shell commands (aside from reading files) |
|
||||
| **Auto Edit** | <li>Read **and** apply-patch writes to files | <li>**All** shell commands |
|
||||
| **Full Auto** | <li>Read/write files <li> Execute shell commands (network disabled, writes limited to your workdir) | - |
|
||||
- [`approval_policy`](./codex-rs/config.md#approval_policy) determines when you should be prompted to approve whether Codex can execute a command
|
||||
- [`sandbox`](./codex-rs/config.md#sandbox) determines the _sandbox policy_ that Codex uses to execute untrusted commands
|
||||
|
||||
In **Full Auto** every command is run **network-disabled** and confined to the
|
||||
current working directory (plus temporary files) for defense-in-depth. Codex
|
||||
will also show a warning/confirmation if you start in **auto-edit** or
|
||||
**full-auto** while the directory is _not_ tracked by Git, so you always have a
|
||||
safety net.
|
||||
By default, Codex runs with `--ask-for-approval untrusted` and `--sandbox read-only`, which means that:
|
||||
|
||||
Coming soon: you'll be able to whitelist specific commands to auto-execute with
|
||||
the network enabled, once we're confident in additional safeguards.
|
||||
- The user is prompted to approve every command not on the set of "trusted" commands built into Codex (`cat`, `ls`, etc.)
|
||||
- Approved commands are run outside of a sandbox because user approval implies "trust," in this case.
|
||||
|
||||
Running Codex with the `--full-auto` convenience flag changes the configuration to `--ask-for-approval on-failure` and `--sandbox workspace-write`, which means that:
|
||||
|
||||
- Codex does not initially ask for user approval before running an individual command.
|
||||
- Though when it runs a command, it is run under a sandbox in which:
|
||||
- It can read any file on the system.
|
||||
- It can only write files under the current directory (or the directory specified via `--cd`).
|
||||
- Network requests are completely disabled.
|
||||
- Only if the command exits with a non-zero exit code will it ask the user for approval. If granted, it will re-attempt the command outside of the sandbox. (A common case is when Codex cannot `npm install` a dependency because that requires network access.)
|
||||
|
||||
Again, these two options can be configured independently. For example, if you want Codex to perform an "exploration" where you are happy for it to read anything it wants but you never want to be prompted, you could run Codex with `--ask-for-approval never` and `--sandbox read-only`.
|
||||
|
||||
### Platform sandboxing details
|
||||
|
||||
The hardening mechanism Codex uses depends on your OS:
|
||||
The mechanism Codex uses to implement the sandbox policy depends on your OS:
|
||||
|
||||
- **macOS 12+** - commands are wrapped with **Apple Seatbelt** (`sandbox-exec`).
|
||||
- **macOS 12+** uses **Apple Seatbelt** and runs commands using `sandbox-exec` with a profile (`-p`) that corresponds to the `--sandbox` that was specified.
|
||||
- **Linux** uses a combination of Landlock/seccomp APIs to enforce the `sandbox` configuration.
|
||||
|
||||
- Everything is placed in a read-only jail except for a small set of
|
||||
writable roots (`$PWD`, `$TMPDIR`, `~/.codex`, etc.).
|
||||
- Outbound network is _fully blocked_ by default - even if a child process
|
||||
tries to `curl` somewhere it will fail.
|
||||
|
||||
- **Linux** - there is no sandboxing by default.
|
||||
We recommend using Docker for sandboxing, where Codex launches itself inside a **minimal
|
||||
container image** and mounts your repo _read/write_ at the same path. A
|
||||
custom `iptables`/`ipset` firewall script denies all egress except the
|
||||
OpenAI API. This gives you deterministic, reproducible runs without needing
|
||||
root on the host. You can use the [`run_in_container.sh`](./codex-cli/scripts/run_in_container.sh) script to set up the sandbox.
|
||||
Note that when running Linux in a containerized environment such as Docker, sandboxing may not work if the host/container configuration does not support the necessary Landlock/seccomp APIs. In such cases, we recommend configuring your Docker container so that it provides the sandbox guarantees you are looking for and then running `codex` with `--sandbox danger-full-access` (or, more simply, the `--dangerously-bypass-approvals-and-sandbox` flag) within your container.
|
||||
|
||||
---
|
||||
|
||||
@@ -205,24 +276,20 @@ The hardening mechanism Codex uses depends on your OS:
|
||||
| Requirement | Details |
|
||||
| --------------------------- | --------------------------------------------------------------- |
|
||||
| Operating systems | macOS 12+, Ubuntu 20.04+/Debian 10+, or Windows 11 **via WSL2** |
|
||||
| Node.js | **22 or newer** (LTS recommended) |
|
||||
| Git (optional, recommended) | 2.23+ for built-in PR helpers |
|
||||
| RAM | 4-GB minimum (8-GB recommended) |
|
||||
|
||||
> Never run `sudo npm install -g`; fix npm permissions instead.
|
||||
|
||||
---
|
||||
|
||||
## CLI reference
|
||||
|
||||
| Command | Purpose | Example |
|
||||
| ------------------------------------ | ----------------------------------- | ------------------------------------ |
|
||||
| `codex` | Interactive REPL | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive REPL | `codex "fix lint errors"` |
|
||||
| `codex -q "..."` | Non-interactive "quiet mode" | `codex -q --json "explain utils.ts"` |
|
||||
| `codex completion <bash\|zsh\|fish>` | Print shell completion script | `codex completion bash` |
|
||||
| Command | Purpose | Example |
|
||||
| ------------------ | ---------------------------------- | ------------------------------- |
|
||||
| `codex` | Interactive TUI | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive TUI | `codex "fix lint errors"` |
|
||||
| `codex exec "..."` | Non-interactive "automation mode" | `codex exec "explain utils.ts"` |
|
||||
|
||||
Key flags: `--model/-m`, `--approval-mode/-a`, `--quiet/-q`, and `--notify`.
|
||||
Key flags: `--model/-m`, `--ask-for-approval/-a`.
|
||||
|
||||
---
|
||||
|
||||
@@ -234,8 +301,6 @@ You can give Codex extra instructions and guidance using `AGENTS.md` files. Code
|
||||
2. `AGENTS.md` at repo root - shared project notes
|
||||
3. `AGENTS.md` in the current working directory - sub-folder/feature specifics
|
||||
|
||||
Disable loading of these files with `--no-project-doc` or the environment variable `CODEX_DISABLE_PROJECT_DOC=1`.
|
||||
|
||||
---
|
||||
|
||||
## Non-interactive / CI mode
|
||||
@@ -247,18 +312,37 @@ Run Codex head-less in pipelines. Example GitHub Action step:
|
||||
run: |
|
||||
npm install -g @openai/codex
|
||||
export OPENAI_API_KEY="${{ secrets.OPENAI_KEY }}"
|
||||
codex -a auto-edit --quiet "update CHANGELOG for next release"
|
||||
codex exec --full-auto "update CHANGELOG for next release"
|
||||
```
|
||||
|
||||
Set `CODEX_QUIET_MODE=1` to silence interactive UI noise.
|
||||
## Model Context Protocol (MCP)
|
||||
|
||||
The Codex CLI can be configured to leverage MCP servers by defining an [`mcp_servers`](./codex-rs/config.md#mcp_servers) section in `~/.codex/config.toml`. It is intended to mirror how tools such as Claude and Cursor define `mcpServers` in their respective JSON config files, though the Codex format is slightly different since it uses TOML rather than JSON, e.g.:
|
||||
|
||||
```toml
|
||||
# IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`.
|
||||
[mcp_servers.server-name]
|
||||
command = "npx"
|
||||
args = ["-y", "mcp-server"]
|
||||
env = { "API_KEY" = "value" }
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> It is somewhat experimental, but the Codex CLI can also be run as an MCP _server_ via `codex mcp`. If you launch it with an MCP client such as `npx @modelcontextprotocol/inspector codex mcp` and send it a `tools/list` request, you will see that there is only one tool, `codex`, that accepts a grab-bag of inputs, including a catch-all `config` map for anything you might want to override. Feel free to play around with it and provide feedback via GitHub issues.
|
||||
|
||||
## Tracing / verbose logging
|
||||
|
||||
Setting the environment variable `DEBUG=true` prints full API request and response details:
|
||||
Because Codex is written in Rust, it honors the `RUST_LOG` environment variable to configure its logging behavior.
|
||||
|
||||
The TUI defaults to `RUST_LOG=codex_core=info,codex_tui=info` and log messages are written to `~/.codex/log/codex-tui.log`, so you can leave the following running in a separate terminal to monitor log messages as they are written:
|
||||
|
||||
```shell
|
||||
DEBUG=true codex
|
||||
```
|
||||
tail -F ~/.codex/log/codex-tui.log
|
||||
```
|
||||
|
||||
By comparison, the non-interactive mode (`codex exec`) defaults to `RUST_LOG=error`, but messages are printed inline, so there is no need to monitor a separate file.
|
||||
|
||||
See the Rust documentation on [`RUST_LOG`](https://docs.rs/env_logger/latest/env_logger/#enabling-logging) for more information on the configuration options.
|
||||
|
||||
---
|
||||
|
||||
@@ -281,201 +365,78 @@ Below are a few bite-size examples you can copy-paste. Replace the text in quote
|
||||
## Installation
|
||||
|
||||
<details open>
|
||||
<summary><strong>From npm (Recommended)</strong></summary>
|
||||
<summary><strong>Install Codex CLI using your preferred package manager.</strong></summary>
|
||||
|
||||
From `brew` (recommended, downloads only the binary for your platform):
|
||||
|
||||
```bash
|
||||
npm install -g @openai/codex
|
||||
# or
|
||||
yarn global add @openai/codex
|
||||
# or
|
||||
bun install -g @openai/codex
|
||||
# or
|
||||
pnpm add -g @openai/codex
|
||||
brew install codex
|
||||
```
|
||||
|
||||
From `npm` (generally more readily available, but downloads binaries for all supported platforms):
|
||||
|
||||
```bash
|
||||
npm i -g @openai/codex
|
||||
```
|
||||
|
||||
Or go to the [latest GitHub Release](https://github.com/openai/codex/releases/latest) and download the appropriate binary for your platform.
|
||||
|
||||
Admittedly, each GitHub Release contains many executables, but in practice, you likely want one of these:
|
||||
|
||||
- macOS
|
||||
- Apple Silicon/arm64: `codex-aarch64-apple-darwin.tar.gz`
|
||||
- x86_64 (older Mac hardware): `codex-x86_64-apple-darwin.tar.gz`
|
||||
- Linux
|
||||
- x86_64: `codex-x86_64-unknown-linux-musl.tar.gz`
|
||||
- arm64: `codex-aarch64-unknown-linux-musl.tar.gz`
|
||||
|
||||
Each archive contains a single entry with the platform baked into the name (e.g., `codex-x86_64-unknown-linux-musl`), so you likely want to rename it to `codex` after extracting it.
|
||||
|
||||
### DotSlash
|
||||
|
||||
The GitHub Release also contains a [DotSlash](https://dotslash-cli.com/) file for the Codex CLI named `codex`. Using a DotSlash file makes it possible to make a lightweight commit to source control to ensure all contributors use the same version of an executable, regardless of what platform they use for development.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Build from source</strong></summary>
|
||||
|
||||
```bash
|
||||
# Clone the repository and navigate to the CLI package
|
||||
# Clone the repository and navigate to the root of the Cargo workspace.
|
||||
git clone https://github.com/openai/codex.git
|
||||
cd codex/codex-cli
|
||||
cd codex/codex-rs
|
||||
|
||||
# Enable corepack
|
||||
corepack enable
|
||||
# Install the Rust toolchain, if necessary.
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
source "$HOME/.cargo/env"
|
||||
rustup component add rustfmt
|
||||
rustup component add clippy
|
||||
|
||||
# Install dependencies and build
|
||||
pnpm install
|
||||
pnpm build
|
||||
# Build Codex.
|
||||
cargo build
|
||||
|
||||
# Linux-only: download prebuilt sandboxing binaries (requires gh and zstd).
|
||||
./scripts/install_native_deps.sh
|
||||
# Launch the TUI with a sample prompt.
|
||||
cargo run --bin codex -- "explain this codebase to me"
|
||||
|
||||
# Get the usage and the options
|
||||
node ./dist/cli.js --help
|
||||
# After making changes, ensure the code is clean.
|
||||
cargo fmt -- --config imports_granularity=Item
|
||||
cargo clippy --tests
|
||||
|
||||
# Run the locally-built CLI directly
|
||||
node ./dist/cli.js
|
||||
|
||||
# Or link the command globally for convenience
|
||||
pnpm link
|
||||
# Run the tests.
|
||||
cargo test
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Configuration guide
|
||||
## Configuration
|
||||
|
||||
Codex configuration files can be placed in the `~/.codex/` directory, supporting both YAML and JSON formats.
|
||||
Codex supports a rich set of configuration options documented in [`codex-rs/config.md`](./codex-rs/config.md).
|
||||
|
||||
### Basic configuration parameters
|
||||
By default, Codex loads its configuration from `~/.codex/config.toml`.
|
||||
|
||||
| Parameter | Type | Default | Description | Available Options |
|
||||
| ------------------- | ------- | ---------- | -------------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `model` | string | `o4-mini` | AI model to use | Any model name supporting OpenAI API |
|
||||
| `approvalMode` | string | `suggest` | AI assistant's permission mode | `suggest` (suggestions only)<br>`auto-edit` (automatic edits)<br>`full-auto` (fully automatic) |
|
||||
| `fullAutoErrorMode` | string | `ask-user` | Error handling in full-auto mode | `ask-user` (prompt for user input)<br>`ignore-and-continue` (ignore and proceed) |
|
||||
| `notify` | boolean | `true` | Enable desktop notifications | `true`/`false` |
|
||||
|
||||
### Custom AI provider configuration
|
||||
|
||||
In the `providers` object, you can configure multiple AI service providers. Each provider requires the following parameters:
|
||||
|
||||
| Parameter | Type | Description | Example |
|
||||
| --------- | ------ | --------------------------------------- | ----------------------------- |
|
||||
| `name` | string | Display name of the provider | `"OpenAI"` |
|
||||
| `baseURL` | string | API service URL | `"https://api.openai.com/v1"` |
|
||||
| `envKey` | string | Environment variable name (for API key) | `"OPENAI_API_KEY"` |
|
||||
|
||||
### History configuration
|
||||
|
||||
In the `history` object, you can configure conversation history settings:
|
||||
|
||||
| Parameter | Type | Description | Example Value |
|
||||
| ------------------- | ------- | ------------------------------------------------------ | ------------- |
|
||||
| `maxSize` | number | Maximum number of history entries to save | `1000` |
|
||||
| `saveHistory` | boolean | Whether to save history | `true` |
|
||||
| `sensitivePatterns` | array | Patterns of sensitive information to filter in history | `[]` |
|
||||
|
||||
### Configuration examples
|
||||
|
||||
1. YAML format (save as `~/.codex/config.yaml`):
|
||||
|
||||
```yaml
|
||||
model: o4-mini
|
||||
approvalMode: suggest
|
||||
fullAutoErrorMode: ask-user
|
||||
notify: true
|
||||
```
|
||||
|
||||
2. JSON format (save as `~/.codex/config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"approvalMode": "suggest",
|
||||
"fullAutoErrorMode": "ask-user",
|
||||
"notify": true
|
||||
}
|
||||
```
|
||||
|
||||
### Full configuration example
|
||||
|
||||
Below is a comprehensive example of `config.json` with multiple custom providers:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"provider": "openai",
|
||||
"providers": {
|
||||
"openai": {
|
||||
"name": "OpenAI",
|
||||
"baseURL": "https://api.openai.com/v1",
|
||||
"envKey": "OPENAI_API_KEY"
|
||||
},
|
||||
"azure": {
|
||||
"name": "AzureOpenAI",
|
||||
"baseURL": "https://YOUR_PROJECT_NAME.openai.azure.com/openai",
|
||||
"envKey": "AZURE_OPENAI_API_KEY"
|
||||
},
|
||||
"openrouter": {
|
||||
"name": "OpenRouter",
|
||||
"baseURL": "https://openrouter.ai/api/v1",
|
||||
"envKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"gemini": {
|
||||
"name": "Gemini",
|
||||
"baseURL": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
"envKey": "GEMINI_API_KEY"
|
||||
},
|
||||
"ollama": {
|
||||
"name": "Ollama",
|
||||
"baseURL": "http://localhost:11434/v1",
|
||||
"envKey": "OLLAMA_API_KEY"
|
||||
},
|
||||
"mistral": {
|
||||
"name": "Mistral",
|
||||
"baseURL": "https://api.mistral.ai/v1",
|
||||
"envKey": "MISTRAL_API_KEY"
|
||||
},
|
||||
"deepseek": {
|
||||
"name": "DeepSeek",
|
||||
"baseURL": "https://api.deepseek.com",
|
||||
"envKey": "DEEPSEEK_API_KEY"
|
||||
},
|
||||
"xai": {
|
||||
"name": "xAI",
|
||||
"baseURL": "https://api.x.ai/v1",
|
||||
"envKey": "XAI_API_KEY"
|
||||
},
|
||||
"groq": {
|
||||
"name": "Groq",
|
||||
"baseURL": "https://api.groq.com/openai/v1",
|
||||
"envKey": "GROQ_API_KEY"
|
||||
},
|
||||
"arceeai": {
|
||||
"name": "ArceeAI",
|
||||
"baseURL": "https://conductor.arcee.ai/v1",
|
||||
"envKey": "ARCEEAI_API_KEY"
|
||||
}
|
||||
},
|
||||
"history": {
|
||||
"maxSize": 1000,
|
||||
"saveHistory": true,
|
||||
"sensitivePatterns": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Custom instructions
|
||||
|
||||
You can create a `~/.codex/AGENTS.md` file to define custom guidance for the agent:
|
||||
|
||||
```markdown
|
||||
- Always respond with emojis
|
||||
- Only use git commands when explicitly requested
|
||||
```
|
||||
|
||||
### Environment variables setup
|
||||
|
||||
For each AI provider, you need to set the corresponding API key in your environment variables. For example:
|
||||
|
||||
```bash
|
||||
# OpenAI
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_API_KEY="your-azure-api-key-here"
|
||||
export AZURE_OPENAI_API_VERSION="2025-03-01-preview" (Optional)
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY="your-openrouter-key-here"
|
||||
|
||||
# Similarly for other providers
|
||||
```
|
||||
Though `--config` can be used to set/override ad-hoc config values for individual invocations of `codex`.
|
||||
|
||||
---
|
||||
|
||||
@@ -524,7 +485,13 @@ Codex CLI **does** support OpenAI organizations with [Zero Data Retention (ZDR)]
|
||||
OpenAI rejected the request. Error details: Status: 400, Code: unsupported_parameter, Type: invalid_request_error, Message: 400 Previous response cannot be used for this organization due to Zero Data Retention.
|
||||
```
|
||||
|
||||
You may need to upgrade to a more recent version with: `npm i -g @openai/codex@latest`
|
||||
Ensure you are running `codex` with `--config disable_response_storage=true` or add this line to `~/.codex/config.toml` to avoid specifying the command line option each time:
|
||||
|
||||
```toml
|
||||
disable_response_storage = true
|
||||
```
|
||||
|
||||
See [the configuration documentation on `disable_response_storage`](./codex-rs/config.md#disable_response_storage) for details.
|
||||
|
||||
---
|
||||
|
||||
@@ -549,51 +516,7 @@ More broadly we welcome contributions - whether you are opening your very first
|
||||
|
||||
- Create a _topic branch_ from `main` - e.g. `feat/interactive-prompt`.
|
||||
- Keep your changes focused. Multiple unrelated fixes should be opened as separate PRs.
|
||||
- Use `pnpm test:watch` during development for super-fast feedback.
|
||||
- We use **Vitest** for unit tests, **ESLint** + **Prettier** for style, and **TypeScript** for type-checking.
|
||||
- Before pushing, run the full test/type/lint suite:
|
||||
|
||||
### Git hooks with Husky
|
||||
|
||||
This project uses [Husky](https://typicode.github.io/husky/) to enforce code quality checks:
|
||||
|
||||
- **Pre-commit hook**: Automatically runs lint-staged to format and lint files before committing
|
||||
- **Pre-push hook**: Runs tests and type checking before pushing to the remote
|
||||
|
||||
These hooks help maintain code quality and prevent pushing code with failing tests. For more details, see [HUSKY.md](./codex-cli/HUSKY.md).
|
||||
|
||||
```bash
|
||||
pnpm test && pnpm run lint && pnpm run typecheck
|
||||
```
|
||||
|
||||
- If you have **not** yet signed the Contributor License Agreement (CLA), add a PR comment containing the exact text
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
The CLA-Assistant bot will turn the PR status green once all authors have signed.
|
||||
|
||||
```bash
|
||||
# Watch mode (tests rerun on change)
|
||||
pnpm test:watch
|
||||
|
||||
# Type-check without emitting files
|
||||
pnpm typecheck
|
||||
|
||||
# Automatically fix lint + prettier issues
|
||||
pnpm lint:fix
|
||||
pnpm format:fix
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
To debug the CLI with a visual debugger, do the following in the `codex-cli` folder:
|
||||
|
||||
- Run `pnpm run build` to build the CLI, which will generate `cli.js.map` alongside `cli.js` in the `dist` folder.
|
||||
- Run the CLI with `node --inspect-brk ./dist/cli.js` The program then waits until a debugger is attached before proceeding. Options:
|
||||
- In VS Code, choose **Debug: Attach to Node Process** from the command palette and choose the option in the dropdown with debug port `9229` (likely the first option)
|
||||
- Go to <chrome://inspect> in Chrome and find **localhost:9229** and click **trace**
|
||||
- Following the [development setup](#development-workflow) instructions above, ensure your change is free of lint warnings and test failures.
|
||||
|
||||
### Writing high-impact code changes
|
||||
|
||||
@@ -605,7 +528,7 @@ To debug the CLI with a visual debugger, do the following in the `codex-cli` fol
|
||||
### Opening a pull request
|
||||
|
||||
- Fill in the PR template (or include similar information) - **What? Why? How?**
|
||||
- Run **all** checks locally (`npm test && npm run lint && npm run typecheck`). CI failures that could have been caught locally slow down the process.
|
||||
- Run **all** checks locally (`cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item`). CI failures that could have been caught locally slow down the process.
|
||||
- Make sure your branch is up-to-date with `main` and that you have resolved merge conflicts.
|
||||
- Mark the PR as **Ready for review** only when you believe it is in a merge-able state.
|
||||
|
||||
@@ -652,73 +575,22 @@ The **DCO check** blocks merges until every commit in the PR carries the footer
|
||||
|
||||
### Releasing `codex`
|
||||
|
||||
To publish a new version of the CLI you first need to stage the npm package. A
|
||||
helper script in `codex-cli/scripts/` does all the heavy lifting. Inside the
|
||||
`codex-cli` folder run:
|
||||
_For admins only._
|
||||
|
||||
```bash
|
||||
# Classic, JS implementation that includes small, native binaries for Linux sandboxing.
|
||||
pnpm stage-release
|
||||
Make sure you are on `main` and have no local changes. Then run:
|
||||
|
||||
# Optionally specify the temp directory to reuse between runs.
|
||||
RELEASE_DIR=$(mktemp -d)
|
||||
pnpm stage-release --tmp "$RELEASE_DIR"
|
||||
|
||||
# "Fat" package that additionally bundles the native Rust CLI binaries for
|
||||
# Linux. End-users can then opt-in at runtime by setting CODEX_RUST=1.
|
||||
pnpm stage-release --native
|
||||
```shell
|
||||
VERSION=0.2.0 # Can also be 0.2.0-alpha.1 or any valid Rust version.
|
||||
./codex-rs/scripts/create_github_release.sh "$VERSION"
|
||||
```
|
||||
|
||||
Go to the folder where the release is staged and verify that it works as intended. If so, run the following from the temp folder:
|
||||
This will make a local commit on top of `main` with `version` set to `$VERSION` in `codex-rs/Cargo.toml` (note that on `main`, we leave the version as `version = "0.0.0"`).
|
||||
|
||||
```
|
||||
cd "$RELEASE_DIR"
|
||||
npm publish
|
||||
```
|
||||
This will push the commit using the tag `rust-v${VERSION}`, which in turn kicks off [the release workflow](.github/workflows/rust-release.yml). This will create a new GitHub Release named `$VERSION`.
|
||||
|
||||
### Alternative build options
|
||||
If everything looks good in the generated GitHub Release, uncheck the **pre-release** box so it is the latest release.
|
||||
|
||||
#### Nix flake development
|
||||
|
||||
Prerequisite: Nix >= 2.4 with flakes enabled (`experimental-features = nix-command flakes` in `~/.config/nix/nix.conf`).
|
||||
|
||||
Enter a Nix development shell:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix develop .#codex-cli # For entering codex-cli specific shell
|
||||
nix develop .#codex-rs # For entering codex-rs specific shell
|
||||
```
|
||||
|
||||
This shell includes Node.js, installs dependencies, builds the CLI, and provides a `codex` command alias.
|
||||
|
||||
Build and run the CLI directly:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix build .#codex-cli # For building codex-cli
|
||||
nix build .#codex-rs # For building codex-rs
|
||||
./result/bin/codex --help
|
||||
```
|
||||
|
||||
Run the CLI via the flake app:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix run .#codex-cli # For running codex-cli
|
||||
nix run .#codex-rs # For running codex-rs
|
||||
```
|
||||
|
||||
Use direnv with flakes
|
||||
|
||||
If you have direnv installed, you can use the following `.envrc` to automatically enter the Nix shell when you `cd` into the project directory:
|
||||
|
||||
```bash
|
||||
cd codex-rs
|
||||
echo "use flake ../flake.nix#codex-cli" >> .envrc && direnv allow
|
||||
cd codex-cli
|
||||
echo "use flake ../flake.nix#codex-rs" >> .envrc && direnv allow
|
||||
```
|
||||
Create a PR to update [`Formula/c/codex.rb`](https://github.com/Homebrew/homebrew-core/blob/main/Formula/c/codex.rb) on Homebrew.
|
||||
|
||||
---
|
||||
|
||||
|
||||
21
SUMMARY.md
Normal file
21
SUMMARY.md
Normal file
@@ -0,0 +1,21 @@
|
||||
You are a summarization assistant. A conversation follows between a user and a coding-focused AI (Codex). Your task is to generate a clear summary capturing:
|
||||
|
||||
• High-level objective or problem being solved
|
||||
• Key instructions or design decisions given by the user
|
||||
• Main code actions or behaviors from the AI
|
||||
• Important variables, functions, modules, or outputs discussed
|
||||
• Any unresolved questions or next steps
|
||||
|
||||
Produce the summary in a structured format like:
|
||||
|
||||
**Objective:** …
|
||||
|
||||
**User instructions:** … (bulleted)
|
||||
|
||||
**AI actions / code behavior:** … (bulleted)
|
||||
|
||||
**Important entities:** … (e.g. function names, variables, files)
|
||||
|
||||
**Open issues / next steps:** … (if any)
|
||||
|
||||
**Summary (concise):** (one or two sentences)
|
||||
4
codex-cli/.gitignore
vendored
4
codex-cli/.gitignore
vendored
@@ -1,3 +1,7 @@
|
||||
# Added by ./scripts/install_native_deps.sh
|
||||
/bin/codex-aarch64-apple-darwin
|
||||
/bin/codex-aarch64-unknown-linux-musl
|
||||
/bin/codex-linux-sandbox-arm64
|
||||
/bin/codex-linux-sandbox-x64
|
||||
/bin/codex-x86_64-apple-darwin
|
||||
/bin/codex-x86_64-unknown-linux-musl
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM node:20-slim
|
||||
FROM node:24-slim
|
||||
|
||||
ARG TZ
|
||||
ENV TZ="$TZ"
|
||||
|
||||
736
codex-cli/README.md
Normal file
736
codex-cli/README.md
Normal file
@@ -0,0 +1,736 @@
|
||||
<h1 align="center">OpenAI Codex CLI</h1>
|
||||
<p align="center">Lightweight coding agent that runs in your terminal</p>
|
||||
|
||||
<p align="center"><code>npm i -g @openai/codex</code></p>
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This is the documentation for the _legacy_ TypeScript implementation of the Codex CLI. It has been superseded by the _Rust_ implementation. See the [README in the root of the Codex repository](https://github.com/openai/codex/blob/main/README.md) for details.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
<details>
|
||||
<summary><strong>Table of contents</strong></summary>
|
||||
|
||||
<!-- Begin ToC -->
|
||||
|
||||
- [Experimental technology disclaimer](#experimental-technology-disclaimer)
|
||||
- [Quickstart](#quickstart)
|
||||
- [Why Codex?](#why-codex)
|
||||
- [Security model & permissions](#security-model--permissions)
|
||||
- [Platform sandboxing details](#platform-sandboxing-details)
|
||||
- [System requirements](#system-requirements)
|
||||
- [CLI reference](#cli-reference)
|
||||
- [Memory & project docs](#memory--project-docs)
|
||||
- [Non-interactive / CI mode](#non-interactive--ci-mode)
|
||||
- [Tracing / verbose logging](#tracing--verbose-logging)
|
||||
- [Recipes](#recipes)
|
||||
- [Installation](#installation)
|
||||
- [Configuration guide](#configuration-guide)
|
||||
- [Basic configuration parameters](#basic-configuration-parameters)
|
||||
- [Custom AI provider configuration](#custom-ai-provider-configuration)
|
||||
- [History configuration](#history-configuration)
|
||||
- [Configuration examples](#configuration-examples)
|
||||
- [Full configuration example](#full-configuration-example)
|
||||
- [Custom instructions](#custom-instructions)
|
||||
- [Environment variables setup](#environment-variables-setup)
|
||||
- [FAQ](#faq)
|
||||
- [Zero data retention (ZDR) usage](#zero-data-retention-zdr-usage)
|
||||
- [Codex open source fund](#codex-open-source-fund)
|
||||
- [Contributing](#contributing)
|
||||
- [Development workflow](#development-workflow)
|
||||
- [Git hooks with Husky](#git-hooks-with-husky)
|
||||
- [Debugging](#debugging)
|
||||
- [Writing high-impact code changes](#writing-high-impact-code-changes)
|
||||
- [Opening a pull request](#opening-a-pull-request)
|
||||
- [Review process](#review-process)
|
||||
- [Community values](#community-values)
|
||||
- [Getting help](#getting-help)
|
||||
- [Contributor license agreement (CLA)](#contributor-license-agreement-cla)
|
||||
- [Quick fixes](#quick-fixes)
|
||||
- [Releasing `codex`](#releasing-codex)
|
||||
- [Alternative build options](#alternative-build-options)
|
||||
- [Nix flake development](#nix-flake-development)
|
||||
- [Security & responsible AI](#security--responsible-ai)
|
||||
- [License](#license)
|
||||
|
||||
<!-- End ToC -->
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Experimental technology disclaimer
|
||||
|
||||
Codex CLI is an experimental project under active development. It is not yet stable, may contain bugs, incomplete features, or undergo breaking changes. We're building it in the open with the community and welcome:
|
||||
|
||||
- Bug reports
|
||||
- Feature requests
|
||||
- Pull requests
|
||||
- Good vibes
|
||||
|
||||
Help us improve by filing issues or submitting PRs (see the section below for how to contribute)!
|
||||
|
||||
## Quickstart
|
||||
|
||||
Install globally:
|
||||
|
||||
```shell
|
||||
npm install -g @openai/codex
|
||||
```
|
||||
|
||||
Next, set your OpenAI API key as an environment variable:
|
||||
|
||||
```shell
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
```
|
||||
|
||||
> **Note:** This command sets the key only for your current terminal session. You can add the `export` line to your shell's configuration file (e.g., `~/.zshrc`) but we recommend setting for the session. **Tip:** You can also place your API key into a `.env` file at the root of your project:
|
||||
>
|
||||
> ```env
|
||||
> OPENAI_API_KEY=your-api-key-here
|
||||
> ```
|
||||
>
|
||||
> The CLI will automatically load variables from `.env` (via `dotenv/config`).
|
||||
|
||||
<details>
|
||||
<summary><strong>Use <code>--provider</code> to use other models</strong></summary>
|
||||
|
||||
> Codex also allows you to use other providers that support the OpenAI Chat Completions API. You can set the provider in the config file or use the `--provider` flag. The possible options for `--provider` are:
|
||||
>
|
||||
> - openai (default)
|
||||
> - openrouter
|
||||
> - azure
|
||||
> - gemini
|
||||
> - ollama
|
||||
> - mistral
|
||||
> - deepseek
|
||||
> - xai
|
||||
> - groq
|
||||
> - arceeai
|
||||
> - any other provider that is compatible with the OpenAI API
|
||||
>
|
||||
> If you use a provider other than OpenAI, you will need to set the API key for the provider in the config file or in the environment variable as:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_API_KEY="your-api-key-here"
|
||||
> ```
|
||||
>
|
||||
> If you use a provider not listed above, you must also set the base URL for the provider:
|
||||
>
|
||||
> ```shell
|
||||
> export <provider>_BASE_URL="https://your-provider-api-base-url"
|
||||
> ```
|
||||
|
||||
</details>
|
||||
<br />
|
||||
|
||||
Run interactively:
|
||||
|
||||
```shell
|
||||
codex
|
||||
```
|
||||
|
||||
Or, run with a prompt as input (and optionally in `Full Auto` mode):
|
||||
|
||||
```shell
|
||||
codex "explain this codebase to me"
|
||||
```
|
||||
|
||||
```shell
|
||||
codex --approval-mode full-auto "create the fanciest todo-list app"
|
||||
```
|
||||
|
||||
That's it - Codex will scaffold a file, run it inside a sandbox, install any
|
||||
missing dependencies, and show you the live result. Approve the changes and
|
||||
they'll be committed to your working directory.
|
||||
|
||||
---
|
||||
|
||||
## Why Codex?
|
||||
|
||||
Codex CLI is built for developers who already **live in the terminal** and want
|
||||
ChatGPT-level reasoning **plus** the power to actually run code, manipulate
|
||||
files, and iterate - all under version control. In short, it's _chat-driven
|
||||
development_ that understands and executes your repo.
|
||||
|
||||
- **Zero setup** - bring your OpenAI API key and it just works!
|
||||
- **Full auto-approval, while safe + secure** by running network-disabled and directory-sandboxed
|
||||
- **Multimodal** - pass in screenshots or diagrams to implement features ✨
|
||||
|
||||
And it's **fully open-source** so you can see and contribute to how it develops!
|
||||
|
||||
---
|
||||
|
||||
## Security model & permissions
|
||||
|
||||
Codex lets you decide _how much autonomy_ the agent receives and auto-approval policy via the
|
||||
`--approval-mode` flag (or the interactive onboarding prompt):
|
||||
|
||||
| Mode | What the agent may do without asking | Still requires approval |
|
||||
| ------------------------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **Suggest** <br>(default) | <li>Read any file in the repo | <li>**All** file writes/patches<li> **Any** arbitrary shell commands (aside from reading files) |
|
||||
| **Auto Edit** | <li>Read **and** apply-patch writes to files | <li>**All** shell commands |
|
||||
| **Full Auto** | <li>Read/write files <li> Execute shell commands (network disabled, writes limited to your workdir) | - |
|
||||
|
||||
In **Full Auto** every command is run **network-disabled** and confined to the
|
||||
current working directory (plus temporary files) for defense-in-depth. Codex
|
||||
will also show a warning/confirmation if you start in **auto-edit** or
|
||||
**full-auto** while the directory is _not_ tracked by Git, so you always have a
|
||||
safety net.
|
||||
|
||||
Coming soon: you'll be able to whitelist specific commands to auto-execute with
|
||||
the network enabled, once we're confident in additional safeguards.
|
||||
|
||||
### Platform sandboxing details
|
||||
|
||||
The hardening mechanism Codex uses depends on your OS:
|
||||
|
||||
- **macOS 12+** - commands are wrapped with **Apple Seatbelt** (`sandbox-exec`).
|
||||
|
||||
- Everything is placed in a read-only jail except for a small set of
|
||||
writable roots (`$PWD`, `$TMPDIR`, `~/.codex`, etc.).
|
||||
- Outbound network is _fully blocked_ by default - even if a child process
|
||||
tries to `curl` somewhere it will fail.
|
||||
|
||||
- **Linux** - there is no sandboxing by default.
|
||||
We recommend using Docker for sandboxing, where Codex launches itself inside a **minimal
|
||||
container image** and mounts your repo _read/write_ at the same path. A
|
||||
custom `iptables`/`ipset` firewall script denies all egress except the
|
||||
OpenAI API. This gives you deterministic, reproducible runs without needing
|
||||
root on the host. You can use the [`run_in_container.sh`](../codex-cli/scripts/run_in_container.sh) script to set up the sandbox.
|
||||
|
||||
---
|
||||
|
||||
## System requirements
|
||||
|
||||
| Requirement | Details |
|
||||
| --------------------------- | --------------------------------------------------------------- |
|
||||
| Operating systems | macOS 12+, Ubuntu 20.04+/Debian 10+, or Windows 11 **via WSL2** |
|
||||
| Node.js | **22 or newer** (LTS recommended) |
|
||||
| Git (optional, recommended) | 2.23+ for built-in PR helpers |
|
||||
| RAM | 4-GB minimum (8-GB recommended) |
|
||||
|
||||
> Never run `sudo npm install -g`; fix npm permissions instead.
|
||||
|
||||
---
|
||||
|
||||
## CLI reference
|
||||
|
||||
| Command | Purpose | Example |
|
||||
| ------------------------------------ | ----------------------------------- | ------------------------------------ |
|
||||
| `codex` | Interactive REPL | `codex` |
|
||||
| `codex "..."` | Initial prompt for interactive REPL | `codex "fix lint errors"` |
|
||||
| `codex -q "..."` | Non-interactive "quiet mode" | `codex -q --json "explain utils.ts"` |
|
||||
| `codex completion <bash\|zsh\|fish>` | Print shell completion script | `codex completion bash` |
|
||||
|
||||
Key flags: `--model/-m`, `--approval-mode/-a`, `--quiet/-q`, and `--notify`.
|
||||
|
||||
---
|
||||
|
||||
## Memory & project docs
|
||||
|
||||
You can give Codex extra instructions and guidance using `AGENTS.md` files. Codex looks for `AGENTS.md` files in the following places, and merges them top-down:
|
||||
|
||||
1. `~/.codex/AGENTS.md` - personal global guidance
|
||||
2. `AGENTS.md` at repo root - shared project notes
|
||||
3. `AGENTS.md` in the current working directory - sub-folder/feature specifics
|
||||
|
||||
Disable loading of these files with `--no-project-doc` or the environment variable `CODEX_DISABLE_PROJECT_DOC=1`.
|
||||
|
||||
---
|
||||
|
||||
## Non-interactive / CI mode
|
||||
|
||||
Run Codex head-less in pipelines. Example GitHub Action step:
|
||||
|
||||
```yaml
|
||||
- name: Update changelog via Codex
|
||||
run: |
|
||||
npm install -g @openai/codex
|
||||
export OPENAI_API_KEY="${{ secrets.OPENAI_KEY }}"
|
||||
codex -a auto-edit --quiet "update CHANGELOG for next release"
|
||||
```
|
||||
|
||||
Set `CODEX_QUIET_MODE=1` to silence interactive UI noise.
|
||||
|
||||
## Tracing / verbose logging
|
||||
|
||||
Setting the environment variable `DEBUG=true` prints full API request and response details:
|
||||
|
||||
```shell
|
||||
DEBUG=true codex
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recipes
|
||||
|
||||
Below are a few bite-size examples you can copy-paste. Replace the text in quotes with your own task. See the [prompting guide](https://github.com/openai/codex/blob/main/codex-cli/examples/prompting_guide.md) for more tips and usage patterns.
|
||||
|
||||
| ✨ | What you type | What happens |
|
||||
| --- | ------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
|
||||
| 1 | `codex "Refactor the Dashboard component to React Hooks"` | Codex rewrites the class component, runs `npm test`, and shows the diff. |
|
||||
| 2 | `codex "Generate SQL migrations for adding a users table"` | Infers your ORM, creates migration files, and runs them in a sandboxed DB. |
|
||||
| 3 | `codex "Write unit tests for utils/date.ts"` | Generates tests, executes them, and iterates until they pass. |
|
||||
| 4 | `codex "Bulk-rename *.jpeg -> *.jpg with git mv"` | Safely renames files and updates imports/usages. |
|
||||
| 5 | `codex "Explain what this regex does: ^(?=.*[A-Z]).{8,}$"` | Outputs a step-by-step human explanation. |
|
||||
| 6 | `codex "Carefully review this repo, and propose 3 high impact well-scoped PRs"` | Suggests impactful PRs in the current codebase. |
|
||||
| 7 | `codex "Look for vulnerabilities and create a security review report"` | Finds and explains security bugs. |
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
<details open>
|
||||
<summary><strong>From npm (Recommended)</strong></summary>
|
||||
|
||||
```bash
|
||||
npm install -g @openai/codex
|
||||
# or
|
||||
yarn global add @openai/codex
|
||||
# or
|
||||
bun install -g @openai/codex
|
||||
# or
|
||||
pnpm add -g @openai/codex
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>Build from source</strong></summary>
|
||||
|
||||
```bash
|
||||
# Clone the repository and navigate to the CLI package
|
||||
git clone https://github.com/openai/codex.git
|
||||
cd codex/codex-cli
|
||||
|
||||
# Enable corepack
|
||||
corepack enable
|
||||
|
||||
# Install dependencies and build
|
||||
pnpm install
|
||||
pnpm build
|
||||
|
||||
# Linux-only: download prebuilt sandboxing binaries (requires gh and zstd).
|
||||
./scripts/install_native_deps.sh
|
||||
|
||||
# Get the usage and the options
|
||||
node ./dist/cli.js --help
|
||||
|
||||
# Run the locally-built CLI directly
|
||||
node ./dist/cli.js
|
||||
|
||||
# Or link the command globally for convenience
|
||||
pnpm link
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Configuration guide
|
||||
|
||||
Codex configuration files can be placed in the `~/.codex/` directory, supporting both YAML and JSON formats.
|
||||
|
||||
### Basic configuration parameters
|
||||
|
||||
| Parameter | Type | Default | Description | Available Options |
|
||||
| ------------------- | ------- | ---------- | -------------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `model` | string | `o4-mini` | AI model to use | Any model name supporting OpenAI API |
|
||||
| `approvalMode` | string | `suggest` | AI assistant's permission mode | `suggest` (suggestions only)<br>`auto-edit` (automatic edits)<br>`full-auto` (fully automatic) |
|
||||
| `fullAutoErrorMode` | string | `ask-user` | Error handling in full-auto mode | `ask-user` (prompt for user input)<br>`ignore-and-continue` (ignore and proceed) |
|
||||
| `notify` | boolean | `true` | Enable desktop notifications | `true`/`false` |
|
||||
|
||||
### Custom AI provider configuration
|
||||
|
||||
In the `providers` object, you can configure multiple AI service providers. Each provider requires the following parameters:
|
||||
|
||||
| Parameter | Type | Description | Example |
|
||||
| --------- | ------ | --------------------------------------- | ----------------------------- |
|
||||
| `name` | string | Display name of the provider | `"OpenAI"` |
|
||||
| `baseURL` | string | API service URL | `"https://api.openai.com/v1"` |
|
||||
| `envKey` | string | Environment variable name (for API key) | `"OPENAI_API_KEY"` |
|
||||
|
||||
### History configuration
|
||||
|
||||
In the `history` object, you can configure conversation history settings:
|
||||
|
||||
| Parameter | Type | Description | Example Value |
|
||||
| ------------------- | ------- | ------------------------------------------------------ | ------------- |
|
||||
| `maxSize` | number | Maximum number of history entries to save | `1000` |
|
||||
| `saveHistory` | boolean | Whether to save history | `true` |
|
||||
| `sensitivePatterns` | array | Patterns of sensitive information to filter in history | `[]` |
|
||||
|
||||
### Configuration examples
|
||||
|
||||
1. YAML format (save as `~/.codex/config.yaml`):
|
||||
|
||||
```yaml
|
||||
model: o4-mini
|
||||
approvalMode: suggest
|
||||
fullAutoErrorMode: ask-user
|
||||
notify: true
|
||||
```
|
||||
|
||||
2. JSON format (save as `~/.codex/config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"approvalMode": "suggest",
|
||||
"fullAutoErrorMode": "ask-user",
|
||||
"notify": true
|
||||
}
|
||||
```
|
||||
|
||||
### Full configuration example
|
||||
|
||||
Below is a comprehensive example of `config.json` with multiple custom providers:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "o4-mini",
|
||||
"provider": "openai",
|
||||
"providers": {
|
||||
"openai": {
|
||||
"name": "OpenAI",
|
||||
"baseURL": "https://api.openai.com/v1",
|
||||
"envKey": "OPENAI_API_KEY"
|
||||
},
|
||||
"azure": {
|
||||
"name": "AzureOpenAI",
|
||||
"baseURL": "https://YOUR_PROJECT_NAME.openai.azure.com/openai",
|
||||
"envKey": "AZURE_OPENAI_API_KEY"
|
||||
},
|
||||
"openrouter": {
|
||||
"name": "OpenRouter",
|
||||
"baseURL": "https://openrouter.ai/api/v1",
|
||||
"envKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"gemini": {
|
||||
"name": "Gemini",
|
||||
"baseURL": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
"envKey": "GEMINI_API_KEY"
|
||||
},
|
||||
"ollama": {
|
||||
"name": "Ollama",
|
||||
"baseURL": "http://localhost:11434/v1",
|
||||
"envKey": "OLLAMA_API_KEY"
|
||||
},
|
||||
"mistral": {
|
||||
"name": "Mistral",
|
||||
"baseURL": "https://api.mistral.ai/v1",
|
||||
"envKey": "MISTRAL_API_KEY"
|
||||
},
|
||||
"deepseek": {
|
||||
"name": "DeepSeek",
|
||||
"baseURL": "https://api.deepseek.com",
|
||||
"envKey": "DEEPSEEK_API_KEY"
|
||||
},
|
||||
"xai": {
|
||||
"name": "xAI",
|
||||
"baseURL": "https://api.x.ai/v1",
|
||||
"envKey": "XAI_API_KEY"
|
||||
},
|
||||
"groq": {
|
||||
"name": "Groq",
|
||||
"baseURL": "https://api.groq.com/openai/v1",
|
||||
"envKey": "GROQ_API_KEY"
|
||||
},
|
||||
"arceeai": {
|
||||
"name": "ArceeAI",
|
||||
"baseURL": "https://conductor.arcee.ai/v1",
|
||||
"envKey": "ARCEEAI_API_KEY"
|
||||
}
|
||||
},
|
||||
"history": {
|
||||
"maxSize": 1000,
|
||||
"saveHistory": true,
|
||||
"sensitivePatterns": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Custom instructions
|
||||
|
||||
You can create a `~/.codex/AGENTS.md` file to define custom guidance for the agent:
|
||||
|
||||
```markdown
|
||||
- Always respond with emojis
|
||||
- Only use git commands when explicitly requested
|
||||
```
|
||||
|
||||
### Environment variables setup
|
||||
|
||||
For each AI provider, you need to set the corresponding API key in your environment variables. For example:
|
||||
|
||||
```bash
|
||||
# OpenAI
|
||||
export OPENAI_API_KEY="your-api-key-here"
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_API_KEY="your-azure-api-key-here"
|
||||
export AZURE_OPENAI_API_VERSION="2025-04-01-preview" (Optional)
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY="your-openrouter-key-here"
|
||||
|
||||
# Similarly for other providers
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAQ
|
||||
|
||||
<details>
|
||||
<summary>OpenAI released a model called Codex in 2021 - is this related?</summary>
|
||||
|
||||
In 2021, OpenAI released Codex, an AI system designed to generate code from natural language prompts. That original Codex model was deprecated as of March 2023 and is separate from the CLI tool.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Which models are supported?</summary>
|
||||
|
||||
Any model available with [Responses API](https://platform.openai.com/docs/api-reference/responses). The default is `o4-mini`, but pass `--model gpt-4.1` or set `model: gpt-4.1` in your config file to override.
|
||||
|
||||
</details>
|
||||
<details>
|
||||
<summary>Why does <code>o3</code> or <code>o4-mini</code> not work for me?</summary>
|
||||
|
||||
It's possible that your [API account needs to be verified](https://help.openai.com/en/articles/10910291-api-organization-verification) in order to start streaming responses and seeing chain of thought summaries from the API. If you're still running into issues, please let us know!
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>How do I stop Codex from editing my files?</summary>
|
||||
|
||||
Codex runs model-generated commands in a sandbox. If a proposed command or file change doesn't look right, you can simply type **n** to deny the command or give the model feedback.
|
||||
|
||||
</details>
|
||||
<details>
|
||||
<summary>Does it work on Windows?</summary>
|
||||
|
||||
Not directly. It requires [Windows Subsystem for Linux (WSL2)](https://learn.microsoft.com/en-us/windows/wsl/install) - Codex has been tested on macOS and Linux with Node 22.
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Zero data retention (ZDR) usage
|
||||
|
||||
Codex CLI **does** support OpenAI organizations with [Zero Data Retention (ZDR)](https://platform.openai.com/docs/guides/your-data#zero-data-retention) enabled. If your OpenAI organization has Zero Data Retention enabled and you still encounter errors such as:
|
||||
|
||||
```
|
||||
OpenAI rejected the request. Error details: Status: 400, Code: unsupported_parameter, Type: invalid_request_error, Message: 400 Previous response cannot be used for this organization due to Zero Data Retention.
|
||||
```
|
||||
|
||||
You may need to upgrade to a more recent version with: `npm i -g @openai/codex@latest`
|
||||
|
||||
---
|
||||
|
||||
## Codex open source fund
|
||||
|
||||
We're excited to launch a **$1 million initiative** supporting open source projects that use Codex CLI and other OpenAI models.
|
||||
|
||||
- Grants are awarded up to **$25,000** API credits.
|
||||
- Applications are reviewed **on a rolling basis**.
|
||||
|
||||
**Interested? [Apply here](https://openai.com/form/codex-open-source-fund/).**
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
This project is under active development and the code will likely change pretty significantly. We'll update this message once that's complete!
|
||||
|
||||
More broadly we welcome contributions - whether you are opening your very first pull request or you're a seasoned maintainer. At the same time we care about reliability and long-term maintainability, so the bar for merging code is intentionally **high**. The guidelines below spell out what "high-quality" means in practice and should make the whole process transparent and friendly.
|
||||
|
||||
### Development workflow
|
||||
|
||||
- Create a _topic branch_ from `main` - e.g. `feat/interactive-prompt`.
|
||||
- Keep your changes focused. Multiple unrelated fixes should be opened as separate PRs.
|
||||
- Use `pnpm test:watch` during development for super-fast feedback.
|
||||
- We use **Vitest** for unit tests, **ESLint** + **Prettier** for style, and **TypeScript** for type-checking.
|
||||
- Before pushing, run the full test/type/lint suite:
|
||||
|
||||
### Git hooks with Husky
|
||||
|
||||
This project uses [Husky](https://typicode.github.io/husky/) to enforce code quality checks:
|
||||
|
||||
- **Pre-commit hook**: Automatically runs lint-staged to format and lint files before committing
|
||||
- **Pre-push hook**: Runs tests and type checking before pushing to the remote
|
||||
|
||||
These hooks help maintain code quality and prevent pushing code with failing tests. For more details, see [HUSKY.md](./HUSKY.md).
|
||||
|
||||
```bash
|
||||
pnpm test && pnpm run lint && pnpm run typecheck
|
||||
```
|
||||
|
||||
- If you have **not** yet signed the Contributor License Agreement (CLA), add a PR comment containing the exact text
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
The CLA-Assistant bot will turn the PR status green once all authors have signed.
|
||||
|
||||
```bash
|
||||
# Watch mode (tests rerun on change)
|
||||
pnpm test:watch
|
||||
|
||||
# Type-check without emitting files
|
||||
pnpm typecheck
|
||||
|
||||
# Automatically fix lint + prettier issues
|
||||
pnpm lint:fix
|
||||
pnpm format:fix
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
To debug the CLI with a visual debugger, do the following in the `codex-cli` folder:
|
||||
|
||||
- Run `pnpm run build` to build the CLI, which will generate `cli.js.map` alongside `cli.js` in the `dist` folder.
|
||||
- Run the CLI with `node --inspect-brk ./dist/cli.js` The program then waits until a debugger is attached before proceeding. Options:
|
||||
- In VS Code, choose **Debug: Attach to Node Process** from the command palette and choose the option in the dropdown with debug port `9229` (likely the first option)
|
||||
- Go to <chrome://inspect> in Chrome and find **localhost:9229** and click **trace**
|
||||
|
||||
### Writing high-impact code changes
|
||||
|
||||
1. **Start with an issue.** Open a new one or comment on an existing discussion so we can agree on the solution before code is written.
|
||||
2. **Add or update tests.** Every new feature or bug-fix should come with test coverage that fails before your change and passes afterwards. 100% coverage is not required, but aim for meaningful assertions.
|
||||
3. **Document behaviour.** If your change affects user-facing behaviour, update the README, inline help (`codex --help`), or relevant example projects.
|
||||
4. **Keep commits atomic.** Each commit should compile and the tests should pass. This makes reviews and potential rollbacks easier.
|
||||
|
||||
### Opening a pull request
|
||||
|
||||
- Fill in the PR template (or include similar information) - **What? Why? How?**
|
||||
- Run **all** checks locally (`npm test && npm run lint && npm run typecheck`). CI failures that could have been caught locally slow down the process.
|
||||
- Make sure your branch is up-to-date with `main` and that you have resolved merge conflicts.
|
||||
- Mark the PR as **Ready for review** only when you believe it is in a merge-able state.
|
||||
|
||||
### Review process
|
||||
|
||||
1. One maintainer will be assigned as a primary reviewer.
|
||||
2. We may ask for changes - please do not take this personally. We value the work, we just also value consistency and long-term maintainability.
|
||||
3. When there is consensus that the PR meets the bar, a maintainer will squash-and-merge.
|
||||
|
||||
### Community values
|
||||
|
||||
- **Be kind and inclusive.** Treat others with respect; we follow the [Contributor Covenant](https://www.contributor-covenant.org/).
|
||||
- **Assume good intent.** Written communication is hard - err on the side of generosity.
|
||||
- **Teach & learn.** If you spot something confusing, open an issue or PR with improvements.
|
||||
|
||||
### Getting help
|
||||
|
||||
If you run into problems setting up the project, would like feedback on an idea, or just want to say _hi_ - please open a Discussion or jump into the relevant issue. We are happy to help.
|
||||
|
||||
Together we can make Codex CLI an incredible tool. **Happy hacking!** :rocket:
|
||||
|
||||
### Contributor license agreement (CLA)
|
||||
|
||||
All contributors **must** accept the CLA. The process is lightweight:
|
||||
|
||||
1. Open your pull request.
|
||||
2. Paste the following comment (or reply `recheck` if you've signed before):
|
||||
|
||||
```text
|
||||
I have read the CLA Document and I hereby sign the CLA
|
||||
```
|
||||
|
||||
3. The CLA-Assistant bot records your signature in the repo and marks the status check as passed.
|
||||
|
||||
No special Git commands, email attachments, or commit footers required.
|
||||
|
||||
#### Quick fixes
|
||||
|
||||
| Scenario | Command |
|
||||
| ----------------- | ------------------------------------------------ |
|
||||
| Amend last commit | `git commit --amend -s --no-edit && git push -f` |
|
||||
|
||||
The **DCO check** blocks merges until every commit in the PR carries the footer (with squash this is just the one).
|
||||
|
||||
### Releasing `codex`
|
||||
|
||||
To publish a new version of the CLI you first need to stage the npm package. A
|
||||
helper script in `codex-cli/scripts/` does all the heavy lifting. Inside the
|
||||
`codex-cli` folder run:
|
||||
|
||||
```bash
|
||||
# Classic, JS implementation that includes small, native binaries for Linux sandboxing.
|
||||
pnpm stage-release
|
||||
|
||||
# Optionally specify the temp directory to reuse between runs.
|
||||
RELEASE_DIR=$(mktemp -d)
|
||||
pnpm stage-release --tmp "$RELEASE_DIR"
|
||||
|
||||
# "Fat" package that additionally bundles the native Rust CLI binaries for
|
||||
# Linux. End-users can then opt-in at runtime by setting CODEX_RUST=1.
|
||||
pnpm stage-release --native
|
||||
```
|
||||
|
||||
Go to the folder where the release is staged and verify that it works as intended. If so, run the following from the temp folder:
|
||||
|
||||
```
|
||||
cd "$RELEASE_DIR"
|
||||
npm publish
|
||||
```
|
||||
|
||||
### Alternative build options
|
||||
|
||||
#### Nix flake development
|
||||
|
||||
Prerequisite: Nix >= 2.4 with flakes enabled (`experimental-features = nix-command flakes` in `~/.config/nix/nix.conf`).
|
||||
|
||||
Enter a Nix development shell:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix develop .#codex-cli # For entering codex-cli specific shell
|
||||
nix develop .#codex-rs # For entering codex-rs specific shell
|
||||
```
|
||||
|
||||
This shell includes Node.js, installs dependencies, builds the CLI, and provides a `codex` command alias.
|
||||
|
||||
Build and run the CLI directly:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix build .#codex-cli # For building codex-cli
|
||||
nix build .#codex-rs # For building codex-rs
|
||||
./result/bin/codex --help
|
||||
```
|
||||
|
||||
Run the CLI via the flake app:
|
||||
|
||||
```bash
|
||||
# Use either one of the commands according to which implementation you want to work with
|
||||
nix run .#codex-cli # For running codex-cli
|
||||
nix run .#codex-rs # For running codex-rs
|
||||
```
|
||||
|
||||
Use direnv with flakes
|
||||
|
||||
If you have direnv installed, you can use the following `.envrc` to automatically enter the Nix shell when you `cd` into the project directory:
|
||||
|
||||
```bash
|
||||
cd codex-rs
|
||||
echo "use flake ../flake.nix#codex-cli" >> .envrc && direnv allow
|
||||
cd codex-cli
|
||||
echo "use flake ../flake.nix#codex-rs" >> .envrc && direnv allow
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security & responsible AI
|
||||
|
||||
Have you discovered a vulnerability or have concerns about model output? Please e-mail **security@openai.com** and we will respond promptly.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
||||
@@ -15,7 +15,6 @@
|
||||
* current platform / architecture, an error is thrown.
|
||||
*/
|
||||
|
||||
import { spawnSync } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath, pathToFileURL } from "url";
|
||||
@@ -35,12 +34,13 @@ const wantsNative = fs.existsSync(path.join(__dirname, "use-native")) ||
|
||||
: false);
|
||||
|
||||
// Try native binary if requested.
|
||||
if (wantsNative) {
|
||||
if (wantsNative && process.platform !== 'win32') {
|
||||
const { platform, arch } = process;
|
||||
|
||||
let targetTriple = null;
|
||||
switch (platform) {
|
||||
case "linux":
|
||||
case "android":
|
||||
switch (arch) {
|
||||
case "x64":
|
||||
targetTriple = "x86_64-unknown-linux-musl";
|
||||
@@ -73,22 +73,77 @@ if (wantsNative) {
|
||||
}
|
||||
|
||||
const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
|
||||
const result = spawnSync(binaryPath, process.argv.slice(2), {
|
||||
|
||||
// Use an asynchronous spawn instead of spawnSync so that Node is able to
|
||||
// respond to signals (e.g. Ctrl-C / SIGINT) while the native binary is
|
||||
// executing. This allows us to forward those signals to the child process
|
||||
// and guarantees that when either the child terminates or the parent
|
||||
// receives a fatal signal, both processes exit in a predictable manner.
|
||||
const { spawn } = await import("child_process");
|
||||
|
||||
const child = spawn(binaryPath, process.argv.slice(2), {
|
||||
stdio: "inherit",
|
||||
env: { ...process.env, CODEX_MANAGED_BY_NPM: "1" },
|
||||
});
|
||||
|
||||
const exitCode = typeof result.status === "number" ? result.status : 1;
|
||||
process.exit(exitCode);
|
||||
}
|
||||
child.on("error", (err) => {
|
||||
// Typically triggered when the binary is missing or not executable.
|
||||
// Re-throwing here will terminate the parent with a non-zero exit code
|
||||
// while still printing a helpful stack trace.
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
// Fallback: execute the original JavaScript CLI.
|
||||
// Forward common termination signals to the child so that it shuts down
|
||||
// gracefully. In the handler we temporarily disable the default behavior of
|
||||
// exiting immediately; once the child has been signaled we simply wait for
|
||||
// its exit event which will in turn terminate the parent (see below).
|
||||
const forwardSignal = (signal) => {
|
||||
if (child.killed) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
child.kill(signal);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
// Resolve the path to the compiled CLI bundle
|
||||
const cliPath = path.resolve(__dirname, "../dist/cli.js");
|
||||
const cliUrl = pathToFileURL(cliPath).href;
|
||||
["SIGINT", "SIGTERM", "SIGHUP"].forEach((sig) => {
|
||||
process.on(sig, () => forwardSignal(sig));
|
||||
});
|
||||
|
||||
// Load and execute the CLI
|
||||
(async () => {
|
||||
// When the child exits, mirror its termination reason in the parent so that
|
||||
// shell scripts and other tooling observe the correct exit status.
|
||||
// Wrap the lifetime of the child process in a Promise so that we can await
|
||||
// its termination in a structured way. The Promise resolves with an object
|
||||
// describing how the child exited: either via exit code or due to a signal.
|
||||
const childResult = await new Promise((resolve) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
resolve({ type: "signal", signal });
|
||||
} else {
|
||||
resolve({ type: "code", exitCode: code ?? 1 });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (childResult.type === "signal") {
|
||||
// Re-emit the same signal so that the parent terminates with the expected
|
||||
// semantics (this also sets the correct exit code of 128 + n).
|
||||
process.kill(process.pid, childResult.signal);
|
||||
} else {
|
||||
process.exit(childResult.exitCode);
|
||||
}
|
||||
} else {
|
||||
// Fallback: execute the original JavaScript CLI.
|
||||
|
||||
// Resolve the path to the compiled CLI bundle
|
||||
const cliPath = path.resolve(__dirname, "../dist/cli.js");
|
||||
const cliUrl = pathToFileURL(cliPath).href;
|
||||
|
||||
// Load and execute the CLI
|
||||
try {
|
||||
await import(cliUrl);
|
||||
} catch (err) {
|
||||
@@ -96,4 +151,4 @@ const cliUrl = pathToFileURL(cliPath).href;
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
@@ -84,6 +84,6 @@
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/openai/codex"
|
||||
"url": "git+https://github.com/openai/codex.git"
|
||||
}
|
||||
}
|
||||
|
||||
9
codex-cli/scripts/README.md
Normal file
9
codex-cli/scripts/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
# npm releases
|
||||
|
||||
Run the following:
|
||||
|
||||
To build the 0.2.x or later version of the npm module, which runs the Rust version of the CLI, build it as follows:
|
||||
|
||||
```bash
|
||||
./codex-cli/scripts/stage_rust_release.py --release-version 0.6.0
|
||||
```
|
||||
@@ -8,7 +8,7 @@
|
||||
# the native implementation when users set CODEX_RUST=1.
|
||||
#
|
||||
# Usage
|
||||
# install_native_deps.sh [RELEASE_ROOT] [--full-native]
|
||||
# install_native_deps.sh [--full-native] [--workflow-url URL] [CODEX_CLI_ROOT]
|
||||
#
|
||||
# The optional RELEASE_ROOT is the path that contains package.json. Omitting
|
||||
# it installs the binaries into the repository's own bin/ folder to support
|
||||
@@ -20,32 +20,43 @@ set -euo pipefail
|
||||
# Parse arguments
|
||||
# ------------------
|
||||
|
||||
DEST_DIR=""
|
||||
CODEX_CLI_ROOT=""
|
||||
INCLUDE_RUST=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
# Until we start publishing stable GitHub releases, we have to grab the binaries
|
||||
# from the GitHub Action that created them. Update the URL below to point to the
|
||||
# appropriate workflow run:
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15981617627"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--full-native)
|
||||
INCLUDE_RUST=1
|
||||
;;
|
||||
--workflow-url)
|
||||
shift || { echo "--workflow-url requires an argument"; exit 1; }
|
||||
if [ -n "$1" ]; then
|
||||
WORKFLOW_URL="$1"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if [[ -z "$DEST_DIR" ]]; then
|
||||
DEST_DIR="$arg"
|
||||
if [[ -z "$CODEX_CLI_ROOT" ]]; then
|
||||
CODEX_CLI_ROOT="$1"
|
||||
else
|
||||
echo "Unexpected argument: $arg" >&2
|
||||
echo "Unexpected argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Determine where the binaries should be installed.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
if [[ $# -gt 0 ]]; then
|
||||
if [ -n "$CODEX_CLI_ROOT" ]; then
|
||||
# The caller supplied a release root directory.
|
||||
CODEX_CLI_ROOT="$1"
|
||||
BIN_DIR="$CODEX_CLI_ROOT/bin"
|
||||
else
|
||||
# No argument; fall back to the repo’s own bin directory.
|
||||
@@ -62,10 +73,6 @@ mkdir -p "$BIN_DIR"
|
||||
# Download and decompress the artifacts from the GitHub Actions workflow.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# Until we start publishing stable GitHub releases, we have to grab the binaries
|
||||
# from the GitHub Action that created them. Update the URL below to point to the
|
||||
# appropriate workflow run:
|
||||
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15483730027"
|
||||
WORKFLOW_ID="${WORKFLOW_URL##*/}"
|
||||
|
||||
ARTIFACTS_DIR="$(mktemp -d)"
|
||||
|
||||
@@ -4,10 +4,7 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stages an npm release for @openai/codex.
|
||||
#
|
||||
# The script used to accept a single optional positional argument that indicated
|
||||
# the temporary directory in which to stage the package. We now support a
|
||||
# flag-based interface so that we can extend the command with further options
|
||||
# without breaking the call-site contract.
|
||||
# Usage:
|
||||
#
|
||||
# --tmp <dir> : Use <dir> instead of a freshly created temp directory.
|
||||
# --native : Bundle the pre-built Rust CLI binaries for Linux alongside
|
||||
@@ -30,11 +27,12 @@ set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [--tmp DIR] [--native]
|
||||
Usage: $(basename "$0") [--tmp DIR] [--native] [--version VERSION]
|
||||
|
||||
Options
|
||||
--tmp DIR Use DIR to stage the release (defaults to a fresh mktemp dir)
|
||||
--native Bundle Rust binaries for Linux (fat package)
|
||||
--version Specify the version to release (defaults to a timestamp-based version)
|
||||
-h, --help Show this help
|
||||
|
||||
Legacy positional argument: the first non-flag argument is still interpreted
|
||||
@@ -45,6 +43,9 @@ EOF
|
||||
|
||||
TMPDIR=""
|
||||
INCLUDE_NATIVE=0
|
||||
# Default to a timestamp-based version (keep same scheme as before)
|
||||
VERSION="$(printf '0.1.%d' "$(date +%y%m%d%H%M)")"
|
||||
WORKFLOW_URL=""
|
||||
|
||||
# Manual flag parser - Bash getopts does not handle GNU long options well.
|
||||
while [[ $# -gt 0 ]]; do
|
||||
@@ -59,6 +60,14 @@ while [[ $# -gt 0 ]]; do
|
||||
--native)
|
||||
INCLUDE_NATIVE=1
|
||||
;;
|
||||
--version)
|
||||
shift || { echo "--version requires an argument"; usage 1; }
|
||||
VERSION="$1"
|
||||
;;
|
||||
--workflow-url)
|
||||
shift || { echo "--workflow-url requires an argument"; exit 1; }
|
||||
WORKFLOW_URL="$1"
|
||||
;;
|
||||
-h|--help)
|
||||
usage 0
|
||||
;;
|
||||
@@ -108,9 +117,6 @@ cp -r dist "$TMPDIR/dist"
|
||||
cp -r src "$TMPDIR/src" # keep source for TS sourcemaps
|
||||
cp ../README.md "$TMPDIR" || true # README is one level up - ignore if missing
|
||||
|
||||
# Derive a timestamp-based version (keep same scheme as before)
|
||||
VERSION="$(printf '0.1.%d' "$(date +%y%m%d%H%M)")"
|
||||
|
||||
# Modify package.json - bump version and optionally add the native directory to
|
||||
# the files array so that the binaries are published to npm.
|
||||
|
||||
@@ -121,7 +127,7 @@ jq --arg version "$VERSION" \
|
||||
# 2. Native runtime deps (sandbox plus optional Rust binaries)
|
||||
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
./scripts/install_native_deps.sh "$TMPDIR" --full-native
|
||||
./scripts/install_native_deps.sh --full-native --workflow-url "$WORKFLOW_URL" "$TMPDIR"
|
||||
touch "${TMPDIR}/bin/use-native"
|
||||
else
|
||||
./scripts/install_native_deps.sh "$TMPDIR"
|
||||
@@ -132,7 +138,8 @@ popd >/dev/null
|
||||
echo "Staged version $VERSION for release in $TMPDIR"
|
||||
|
||||
if [[ "$INCLUDE_NATIVE" -eq 1 ]]; then
|
||||
echo "Test Rust:"
|
||||
echo "Verify the CLI:"
|
||||
echo " node ${TMPDIR}/bin/codex.js --version"
|
||||
echo " node ${TMPDIR}/bin/codex.js --help"
|
||||
else
|
||||
echo "Test Node:"
|
||||
|
||||
62
codex-cli/scripts/stage_rust_release.py
Executable file
62
codex-cli/scripts/stage_rust_release.py
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Stage a release for the npm module.
|
||||
|
||||
Run this after the GitHub Release has been created and use
|
||||
`--release-version` to specify the version to release.
|
||||
"""
|
||||
)
|
||||
parser.add_argument(
|
||||
"--release-version", required=True, help="Version to release, e.g., 0.3.0"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
version = args.release_version
|
||||
|
||||
gh_run = subprocess.run(
|
||||
[
|
||||
"gh",
|
||||
"run",
|
||||
"list",
|
||||
"--branch",
|
||||
f"rust-v{version}",
|
||||
"--json",
|
||||
"workflowName,url,headSha",
|
||||
"--jq",
|
||||
'first(.[] | select(.workflowName == "rust-release"))',
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
gh_run.check_returncode()
|
||||
workflow = json.loads(gh_run.stdout)
|
||||
sha = workflow["headSha"]
|
||||
|
||||
print(f"should `git checkout {sha}`")
|
||||
|
||||
current_dir = Path(__file__).parent.resolve()
|
||||
stage_release = subprocess.run(
|
||||
[
|
||||
current_dir / "stage_release.sh",
|
||||
"--version",
|
||||
version,
|
||||
"--workflow-url",
|
||||
workflow["url"],
|
||||
"--native",
|
||||
]
|
||||
)
|
||||
stage_release.check_returncode()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -370,11 +370,26 @@ export function isSafeCommand(
|
||||
reason: "View file with line numbers",
|
||||
group: "Reading files",
|
||||
};
|
||||
case "rg":
|
||||
case "rg": {
|
||||
// Certain ripgrep options execute external commands or invoke other
|
||||
// processes, so we must reject them.
|
||||
const isUnsafe = command.some(
|
||||
(arg: string) =>
|
||||
UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS.has(arg) ||
|
||||
[...UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS].some(
|
||||
(opt) => arg === opt || arg.startsWith(`${opt}=`),
|
||||
),
|
||||
);
|
||||
|
||||
if (isUnsafe) {
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
reason: "Ripgrep search",
|
||||
group: "Searching",
|
||||
};
|
||||
}
|
||||
case "find": {
|
||||
// Certain options to `find` allow executing arbitrary processes, so we
|
||||
// cannot auto-approve them.
|
||||
@@ -495,6 +510,22 @@ const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
|
||||
"-fprintf",
|
||||
]);
|
||||
|
||||
// Ripgrep options that are considered unsafe because they may execute
|
||||
// arbitrary commands or spawn auxiliary processes.
|
||||
const UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS: ReadonlySet<string> = new Set([
|
||||
// Executes an arbitrary command for each matching file.
|
||||
"--pre",
|
||||
// Allows custom hostname command which could leak environment details.
|
||||
"--hostname-bin",
|
||||
]);
|
||||
|
||||
const UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS: ReadonlySet<string> = new Set([
|
||||
// Enables searching inside archives which triggers external decompression
|
||||
// utilities – reject out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
]);
|
||||
|
||||
// ---------------- Helper utilities for complex shell expressions -----------------
|
||||
|
||||
// A conservative allow-list of bash operators that do not, on their own, cause
|
||||
|
||||
@@ -45,6 +45,7 @@ import { createInputItem } from "./utils/input-utils";
|
||||
import { initLogger } from "./utils/logger/log";
|
||||
import { isModelSupportedForResponses } from "./utils/model-utils.js";
|
||||
import { parseToolCall } from "./utils/parsers";
|
||||
import { providers } from "./utils/providers";
|
||||
import { onExit, setInkRenderer } from "./utils/terminal";
|
||||
import chalk from "chalk";
|
||||
import { spawnSync } from "child_process";
|
||||
@@ -327,26 +328,44 @@ try {
|
||||
// ignore errors
|
||||
}
|
||||
|
||||
if (cli.flags.login) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
// Get provider-specific API key if not OpenAI
|
||||
if (provider.toLowerCase() !== "openai") {
|
||||
const providerInfo = providers[provider.toLowerCase()];
|
||||
if (providerInfo) {
|
||||
const providerApiKey = process.env[providerInfo.envKey];
|
||||
if (providerApiKey) {
|
||||
apiKey = providerApiKey;
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else if (!apiKey) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
}
|
||||
|
||||
// Only proceed with OpenAI auth flow if:
|
||||
// 1. Provider is OpenAI and no API key is set, or
|
||||
// 2. Login flag is explicitly set
|
||||
if (provider.toLowerCase() === "openai" && !apiKey) {
|
||||
if (cli.flags.login) {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
try {
|
||||
const home = os.homedir();
|
||||
const authDir = path.join(home, ".codex");
|
||||
const authFile = path.join(authDir, "auth.json");
|
||||
if (fs.existsSync(authFile)) {
|
||||
const data = JSON.parse(fs.readFileSync(authFile, "utf-8"));
|
||||
savedTokens = data.tokens;
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
} else {
|
||||
apiKey = await fetchApiKey(client.issuer, client.client_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the API key is available as an environment variable for legacy code
|
||||
process.env["OPENAI_API_KEY"] = apiKey;
|
||||
|
||||
if (cli.flags.free) {
|
||||
// Only attempt credit redemption for OpenAI provider
|
||||
if (cli.flags.free && provider.toLowerCase() === "openai") {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`${chalk.bold("codex --free")} attempting to redeem credits...`);
|
||||
if (!savedTokens?.refresh_token) {
|
||||
@@ -379,13 +398,18 @@ if (!apiKey && !NO_API_KEY_REQUIRED.has(provider.toLowerCase())) {
|
||||
? `You can create a key here: ${chalk.bold(
|
||||
chalk.underline("https://platform.openai.com/account/api-keys"),
|
||||
)}\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
: provider.toLowerCase() === "azure"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
`${provider.toUpperCase()}_OPENAI_API_KEY`,
|
||||
)} ` +
|
||||
`in Azure AI Foundry portal at ${chalk.bold(chalk.underline("https://ai.azure.com"))}.\n`
|
||||
: provider.toLowerCase() === "gemini"
|
||||
? `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`Google AI Studio`)}.\n`
|
||||
: `You can create a ${chalk.bold(
|
||||
`${provider.toUpperCase()}_API_KEY`,
|
||||
)} ` + `in the ${chalk.bold(`${provider}`)} dashboard.\n`
|
||||
}`,
|
||||
);
|
||||
process.exit(1);
|
||||
|
||||
@@ -800,7 +800,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
@@ -1188,7 +1189,8 @@ export class AgentLoop {
|
||||
|
||||
const responseCall =
|
||||
!this.config.provider ||
|
||||
this.config.provider?.toLowerCase() === "openai"
|
||||
this.config.provider?.toLowerCase() === "openai" ||
|
||||
this.config.provider?.toLowerCase() === "azure"
|
||||
? (params: ResponseCreateParams) =>
|
||||
this.oai.responses.create(params)
|
||||
: (params: ResponseCreateParams) =>
|
||||
|
||||
@@ -147,4 +147,8 @@ const READ_ONLY_SEATBELT_POLICY = `
|
||||
(sysctl-name "kern.version")
|
||||
(sysctl-name "sysctl.proc_cputype")
|
||||
(sysctl-name-prefix "hw.perflevel")
|
||||
)`.trim();
|
||||
)
|
||||
|
||||
; Added on top of Chrome profile
|
||||
; Needed for python multiprocessing on MacOS for the SemLock
|
||||
(allow ipc-posix-sem)`.trim();
|
||||
|
||||
@@ -69,7 +69,7 @@ export const OPENAI_BASE_URL = process.env["OPENAI_BASE_URL"] || "";
|
||||
export let OPENAI_API_KEY = process.env["OPENAI_API_KEY"] || "";
|
||||
|
||||
export const AZURE_OPENAI_API_VERSION =
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-03-01-preview";
|
||||
process.env["AZURE_OPENAI_API_VERSION"] || "2025-04-01-preview";
|
||||
|
||||
export const DEFAULT_REASONING_EFFORT = "high";
|
||||
export const OPENAI_ORGANIZATION = process.env["OPENAI_ORGANIZATION"] || "";
|
||||
|
||||
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
107
codex-cli/tests/agent-azure-responses-endpoint.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
/**
|
||||
* tests/agent-azure-responses-endpoint.test.ts
|
||||
*
|
||||
* Verifies that AgentLoop calls the `/responses` endpoint when provider is set to Azure.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
|
||||
// Fake stream that yields a completed response event
|
||||
class FakeStream {
|
||||
async *[Symbol.asyncIterator]() {
|
||||
yield {
|
||||
type: "response.completed",
|
||||
response: { id: "azure_resp", status: "completed", output: [] },
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
|
||||
let lastCreateParams: any = null;
|
||||
|
||||
vi.mock("openai", () => {
|
||||
class FakeDefaultClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class FakeAzureClient {
|
||||
public responses = {
|
||||
create: async (params: any) => {
|
||||
lastCreateParams = params;
|
||||
return new FakeStream();
|
||||
},
|
||||
};
|
||||
}
|
||||
class APIConnectionTimeoutError extends Error {}
|
||||
return {
|
||||
__esModule: true,
|
||||
default: FakeDefaultClient,
|
||||
AzureOpenAI: FakeAzureClient,
|
||||
APIConnectionTimeoutError,
|
||||
};
|
||||
});
|
||||
|
||||
// Stub approvals to bypass command approval logic
|
||||
vi.mock("../src/approvals.js", () => ({
|
||||
__esModule: true,
|
||||
alwaysApprovedCommands: new Set<string>(),
|
||||
canAutoApprove: () => ({ type: "auto-approve", runInSandbox: false }),
|
||||
isSafeCommand: () => null,
|
||||
}));
|
||||
|
||||
// Stub format-command to avoid formatting side effects
|
||||
vi.mock("../src/format-command.js", () => ({
|
||||
__esModule: true,
|
||||
formatCommandForDisplay: (cmd: Array<string>) => cmd.join(" "),
|
||||
}));
|
||||
|
||||
// Stub internal logging to keep output clean
|
||||
vi.mock("../src/utils/agent/log.js", () => ({
|
||||
__esModule: true,
|
||||
log: () => {},
|
||||
isLoggingEnabled: () => false,
|
||||
}));
|
||||
|
||||
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
||||
|
||||
describe("AgentLoop Azure provider responses endpoint", () => {
|
||||
beforeEach(() => {
|
||||
lastCreateParams = null;
|
||||
});
|
||||
|
||||
it("calls the /responses endpoint when provider is azure", async () => {
|
||||
const cfg: any = {
|
||||
model: "test-model",
|
||||
provider: "azure",
|
||||
instructions: "",
|
||||
disableResponseStorage: false,
|
||||
notify: false,
|
||||
};
|
||||
const loop = new AgentLoop({
|
||||
additionalWritableRoots: [],
|
||||
model: cfg.model,
|
||||
config: cfg,
|
||||
instructions: cfg.instructions,
|
||||
approvalPolicy: { mode: "suggest" } as any,
|
||||
onItem: () => {},
|
||||
onLoading: () => {},
|
||||
getCommandConfirmation: async () => ({ review: "yes" }) as any,
|
||||
onLastResponseId: () => {},
|
||||
});
|
||||
|
||||
await loop.run([
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "hello" }],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(lastCreateParams).not.toBeNull();
|
||||
expect(lastCreateParams.model).toBe(cfg.model);
|
||||
expect(Array.isArray(lastCreateParams.input)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -44,6 +44,14 @@ describe("canAutoApprove()", () => {
|
||||
group: "Navigating",
|
||||
runInSandbox: false,
|
||||
});
|
||||
|
||||
// Ripgrep safe invocation.
|
||||
expect(check(["rg", "TODO"])).toEqual({
|
||||
type: "auto-approve",
|
||||
reason: "Ripgrep search",
|
||||
group: "Searching",
|
||||
runInSandbox: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("simple safe commands within a `bash -lc` call", () => {
|
||||
@@ -67,6 +75,24 @@ describe("canAutoApprove()", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("ripgrep unsafe flags", () => {
|
||||
// Flags that do not take arguments
|
||||
expect(check(["rg", "--search-zip", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "-z", "TODO"])).toEqual({ type: "ask-user" });
|
||||
|
||||
// Flags that take arguments (provided separately)
|
||||
expect(check(["rg", "--pre", "cat", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "--hostname-bin", "hostname", "TODO"])).toEqual({
|
||||
type: "ask-user",
|
||||
});
|
||||
|
||||
// Flags that take arguments in = form
|
||||
expect(check(["rg", "--pre=cat", "TODO"])).toEqual({ type: "ask-user" });
|
||||
expect(check(["rg", "--hostname-bin=hostname", "TODO"])).toEqual({
|
||||
type: "ask-user",
|
||||
});
|
||||
});
|
||||
|
||||
test("bash -lc commands with unsafe redirects", () => {
|
||||
expect(check(["bash", "-lc", "echo hello > file.txt"])).toEqual({
|
||||
type: "ask-user",
|
||||
|
||||
1644
codex-rs/Cargo.lock
generated
1644
codex-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,20 +1,23 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"ansi-escape",
|
||||
"apply-patch",
|
||||
"arg0",
|
||||
"cli",
|
||||
"common",
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"login",
|
||||
"mcp-client",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
"ollama",
|
||||
"tui",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.0.0"
|
||||
@@ -36,3 +39,10 @@ lto = "fat"
|
||||
# Because we bundle some of these executables with the TypeScript CLI, we
|
||||
# remove everything to make the binary as small as possible.
|
||||
strip = "symbols"
|
||||
|
||||
# See https://github.com/openai/codex/issues/1411 for details.
|
||||
codegen-units = 1
|
||||
|
||||
[patch.crates-io]
|
||||
# ratatui = { path = "../../ratatui" }
|
||||
ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
|
||||
|
||||
@@ -39,25 +39,52 @@ You can enable notifications by configuring a script that is run whenever the ag
|
||||
|
||||
To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on.
|
||||
|
||||
### Use `@` for file search
|
||||
|
||||
Typing `@` triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search.
|
||||
|
||||
### `--cd`/`-C` flag
|
||||
|
||||
Sometimes it is not convenient to `cd` to the directory you want Codex to use as the "working root" before running Codex. Fortunately, `codex` supports a `--cd` option so you can specify whatever folder you want. You can confirm that Codex is honoring `--cd` by double-checking the **workdir** it reports in the TUI at the start of a new session.
|
||||
|
||||
### Shell completions
|
||||
|
||||
Generate shell completion scripts via:
|
||||
|
||||
```shell
|
||||
codex completion bash
|
||||
codex completion zsh
|
||||
codex completion fish
|
||||
```
|
||||
|
||||
### Experimenting with the Codex Sandbox
|
||||
|
||||
To test to see what happens when a command is run under the sandbox provided by Codex, we provide the following subcommands in Codex CLI:
|
||||
|
||||
```
|
||||
# macOS
|
||||
codex debug seatbelt [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
codex debug seatbelt [--full-auto] [COMMAND]...
|
||||
|
||||
# Linux
|
||||
codex debug landlock [-s SANDBOX_PERMISSION]... [COMMAND]...
|
||||
codex debug landlock [--full-auto] [COMMAND]...
|
||||
```
|
||||
|
||||
You can experiment with different values of `-s` to see what permissions the `COMMAND` needs to execute successfully.
|
||||
### Selecting a sandbox policy via `--sandbox`
|
||||
|
||||
Note that the exact API for the `-s` flag is currently in flux. See https://github.com/openai/codex/issues/1248 for details.
|
||||
The Rust CLI exposes a dedicated `--sandbox` (`-s`) flag that lets you pick the sandbox policy **without** having to reach for the generic `-c/--config` option:
|
||||
|
||||
```shell
|
||||
# Run Codex with the default, read-only sandbox
|
||||
codex --sandbox read-only
|
||||
|
||||
# Allow the agent to write within the current workspace while still blocking network access
|
||||
codex --sandbox workspace-write
|
||||
|
||||
# Danger! Disable sandboxing entirely (only do this if you are already running in a container or other isolated env)
|
||||
codex --sandbox danger-full-access
|
||||
```
|
||||
|
||||
The same setting can be persisted in `~/.codex/config.toml` via the top-level `sandbox_mode = "MODE"` key, e.g. `sandbox_mode = "workspace-write"`.
|
||||
|
||||
## Code Organization
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-ansi-escape"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "codex_ansi_escape"
|
||||
@@ -10,7 +10,7 @@ path = "src/lib.rs"
|
||||
[dependencies]
|
||||
ansi-to-tui = "7.0.0"
|
||||
ratatui = { version = "0.29.0", features = [
|
||||
"unstable-widget-ref",
|
||||
"unstable-rendered-line-info",
|
||||
"unstable-widget-ref",
|
||||
] }
|
||||
tracing = { version = "0.1.41", features = ["log"] }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-apply-patch"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "codex_apply_patch"
|
||||
@@ -12,11 +12,10 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
serde_json = "1.0.110"
|
||||
similar = "2.7.0"
|
||||
thiserror = "2.0.12"
|
||||
tree-sitter = "0.25.3"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter = "0.25.8"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.1"
|
||||
|
||||
@@ -42,6 +42,15 @@ impl From<std::io::Error> for ApplyPatchError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&std::io::Error> for ApplyPatchError {
|
||||
fn from(err: &std::io::Error) -> Self {
|
||||
ApplyPatchError::IoError(IoError {
|
||||
context: "I/O error".to_string(),
|
||||
source: std::io::Error::new(err.kind(), err.to_string()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("{context}: {source}")]
|
||||
pub struct IoError {
|
||||
@@ -58,16 +67,24 @@ impl PartialEq for IoError {
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MaybeApplyPatch {
|
||||
Body(Vec<Hunk>),
|
||||
Body(ApplyPatchArgs),
|
||||
ShellParseError(ExtractHeredocError),
|
||||
PatchParseError(ParseError),
|
||||
NotApplyPatch,
|
||||
}
|
||||
|
||||
/// Both the raw PATCH argument to `apply_patch` as well as the PATCH argument
|
||||
/// parsed into hunks.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ApplyPatchArgs {
|
||||
pub patch: String,
|
||||
pub hunks: Vec<Hunk>,
|
||||
}
|
||||
|
||||
pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
|
||||
match argv {
|
||||
[cmd, body] if cmd == "apply_patch" => match parse_patch(body) {
|
||||
Ok(hunks) => MaybeApplyPatch::Body(hunks),
|
||||
Ok(source) => MaybeApplyPatch::Body(source),
|
||||
Err(e) => MaybeApplyPatch::PatchParseError(e),
|
||||
},
|
||||
[bash, flag, script]
|
||||
@@ -77,7 +94,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
|
||||
{
|
||||
match extract_heredoc_body_from_apply_patch_command(script) {
|
||||
Ok(body) => match parse_patch(&body) {
|
||||
Ok(hunks) => MaybeApplyPatch::Body(hunks),
|
||||
Ok(source) => MaybeApplyPatch::Body(source),
|
||||
Err(e) => MaybeApplyPatch::PatchParseError(e),
|
||||
},
|
||||
Err(e) => MaybeApplyPatch::ShellParseError(e),
|
||||
@@ -116,11 +133,19 @@ pub enum MaybeApplyPatchVerified {
|
||||
NotApplyPatch,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
/// ApplyPatchAction is the result of parsing an `apply_patch` command. By
|
||||
/// construction, all paths should be absolute paths.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ApplyPatchAction {
|
||||
changes: HashMap<PathBuf, ApplyPatchFileChange>,
|
||||
|
||||
/// The raw patch argument that can be used with `apply_patch` as an exec
|
||||
/// call. i.e., if the original arg was parsed in "lenient" mode with a
|
||||
/// heredoc, this should be the value without the heredoc wrapper.
|
||||
pub patch: String,
|
||||
|
||||
/// The working directory that was used to resolve relative paths in the patch.
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
impl ApplyPatchAction {
|
||||
@@ -140,8 +165,28 @@ impl ApplyPatchAction {
|
||||
panic!("path must be absolute");
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
let filename = path
|
||||
.file_name()
|
||||
.expect("path should not be empty")
|
||||
.to_string_lossy();
|
||||
let patch = format!(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: {filename}
|
||||
@@
|
||||
+ {content}
|
||||
*** End Patch"#,
|
||||
);
|
||||
let changes = HashMap::from([(path.to_path_buf(), ApplyPatchFileChange::Add { content })]);
|
||||
Self { changes }
|
||||
#[allow(clippy::expect_used)]
|
||||
Self {
|
||||
changes,
|
||||
cwd: path
|
||||
.parent()
|
||||
.expect("path should have parent")
|
||||
.to_path_buf(),
|
||||
patch,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,7 +194,7 @@ impl ApplyPatchAction {
|
||||
/// patch.
|
||||
pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
|
||||
match maybe_parse_apply_patch(argv) {
|
||||
MaybeApplyPatch::Body(hunks) => {
|
||||
MaybeApplyPatch::Body(ApplyPatchArgs { patch, hunks }) => {
|
||||
let mut changes = HashMap::new();
|
||||
for hunk in hunks {
|
||||
let path = hunk.resolve_path(cwd);
|
||||
@@ -183,7 +228,11 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
|
||||
}
|
||||
}
|
||||
}
|
||||
MaybeApplyPatchVerified::Body(ApplyPatchAction { changes })
|
||||
MaybeApplyPatchVerified::Body(ApplyPatchAction {
|
||||
changes,
|
||||
patch,
|
||||
cwd: cwd.to_path_buf(),
|
||||
})
|
||||
}
|
||||
MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
|
||||
MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
|
||||
@@ -264,7 +313,7 @@ pub fn apply_patch(
|
||||
stderr: &mut impl std::io::Write,
|
||||
) -> Result<(), ApplyPatchError> {
|
||||
let hunks = match parse_patch(patch) {
|
||||
Ok(hunks) => hunks,
|
||||
Ok(source) => source.hunks,
|
||||
Err(e) => {
|
||||
match &e {
|
||||
InvalidPatchError(message) => {
|
||||
@@ -326,13 +375,21 @@ pub fn apply_hunks(
|
||||
match apply_hunks_to_files(hunks) {
|
||||
Ok(affected) => {
|
||||
print_summary(&affected, stdout).map_err(ApplyPatchError::from)?;
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
writeln!(stderr, "{err:?}").map_err(ApplyPatchError::from)?;
|
||||
let msg = err.to_string();
|
||||
writeln!(stderr, "{msg}").map_err(ApplyPatchError::from)?;
|
||||
if let Some(io) = err.downcast_ref::<std::io::Error>() {
|
||||
Err(ApplyPatchError::from(io))
|
||||
} else {
|
||||
Err(ApplyPatchError::IoError(IoError {
|
||||
context: msg,
|
||||
source: std::io::Error::other(err),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Applies each parsed patch hunk to the filesystem.
|
||||
@@ -633,7 +690,7 @@ mod tests {
|
||||
|
||||
/// Helper to construct a patch with the given body.
|
||||
fn wrap_patch(body: &str) -> String {
|
||||
format!("*** Begin Patch\n{}\n*** End Patch", body)
|
||||
format!("*** Begin Patch\n{body}\n*** End Patch")
|
||||
}
|
||||
|
||||
fn strs_to_strings(strs: &[&str]) -> Vec<String> {
|
||||
@@ -652,7 +709,7 @@ mod tests {
|
||||
]);
|
||||
|
||||
match maybe_parse_apply_patch(&args) {
|
||||
MaybeApplyPatch::Body(hunks) => {
|
||||
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
|
||||
assert_eq!(
|
||||
hunks,
|
||||
vec![Hunk::AddFile {
|
||||
@@ -661,7 +718,7 @@ mod tests {
|
||||
}]
|
||||
);
|
||||
}
|
||||
result => panic!("expected MaybeApplyPatch::Body got {:?}", result),
|
||||
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -679,7 +736,7 @@ PATCH"#,
|
||||
]);
|
||||
|
||||
match maybe_parse_apply_patch(&args) {
|
||||
MaybeApplyPatch::Body(hunks) => {
|
||||
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
|
||||
assert_eq!(
|
||||
hunks,
|
||||
vec![Hunk::AddFile {
|
||||
@@ -688,7 +745,7 @@ PATCH"#,
|
||||
}]
|
||||
);
|
||||
}
|
||||
result => panic!("expected MaybeApplyPatch::Body got {:?}", result),
|
||||
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -954,7 +1011,7 @@ PATCH"#,
|
||||
));
|
||||
let patch = parse_patch(&patch).unwrap();
|
||||
|
||||
let update_file_chunks = match patch.as_slice() {
|
||||
let update_file_chunks = match patch.hunks.as_slice() {
|
||||
[Hunk::UpdateFile { chunks, .. }] => chunks,
|
||||
_ => panic!("Expected a single UpdateFile hunk"),
|
||||
};
|
||||
@@ -992,7 +1049,7 @@ PATCH"#,
|
||||
));
|
||||
|
||||
let patch = parse_patch(&patch).unwrap();
|
||||
let chunks = match patch.as_slice() {
|
||||
let chunks = match patch.hunks.as_slice() {
|
||||
[Hunk::UpdateFile { chunks, .. }] => chunks,
|
||||
_ => panic!("Expected a single UpdateFile hunk"),
|
||||
};
|
||||
@@ -1029,7 +1086,7 @@ PATCH"#,
|
||||
));
|
||||
|
||||
let patch = parse_patch(&patch).unwrap();
|
||||
let chunks = match patch.as_slice() {
|
||||
let chunks = match patch.hunks.as_slice() {
|
||||
[Hunk::UpdateFile { chunks, .. }] => chunks,
|
||||
_ => panic!("Expected a single UpdateFile hunk"),
|
||||
};
|
||||
@@ -1064,7 +1121,7 @@ PATCH"#,
|
||||
));
|
||||
|
||||
let patch = parse_patch(&patch).unwrap();
|
||||
let chunks = match patch.as_slice() {
|
||||
let chunks = match patch.hunks.as_slice() {
|
||||
[Hunk::UpdateFile { chunks, .. }] => chunks,
|
||||
_ => panic!("Expected a single UpdateFile hunk"),
|
||||
};
|
||||
@@ -1110,7 +1167,7 @@ PATCH"#,
|
||||
|
||||
// Extract chunks then build the unified diff.
|
||||
let parsed = parse_patch(&patch).unwrap();
|
||||
let chunks = match parsed.as_slice() {
|
||||
let chunks = match parsed.hunks.as_slice() {
|
||||
[Hunk::UpdateFile { chunks, .. }] => chunks,
|
||||
_ => panic!("Expected a single UpdateFile hunk"),
|
||||
};
|
||||
@@ -1193,7 +1250,29 @@ g
|
||||
new_content: "updated session directory content\n".to_string(),
|
||||
},
|
||||
)]),
|
||||
patch: argv[1].clone(),
|
||||
cwd: session_dir.path().to_path_buf(),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_fails_on_write_error() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("readonly.txt");
|
||||
fs::write(&path, "before\n").unwrap();
|
||||
let mut perms = fs::metadata(&path).unwrap().permissions();
|
||||
perms.set_readonly(true);
|
||||
fs::set_permissions(&path, perms).unwrap();
|
||||
|
||||
let patch = wrap_patch(&format!(
|
||||
"*** Update File: {}\n@@\n-before\n+after\n*** End Patch",
|
||||
path.display()
|
||||
));
|
||||
|
||||
let mut stdout = Vec::new();
|
||||
let mut stderr = Vec::new();
|
||||
let result = apply_patch(&patch, &mut stdout, &mut stderr);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
//!
|
||||
//! The parser below is a little more lenient than the explicit spec and allows for
|
||||
//! leading/trailing whitespace around patch markers.
|
||||
use crate::ApplyPatchArgs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -102,7 +103,7 @@ pub struct UpdateFileChunk {
|
||||
pub is_end_of_file: bool,
|
||||
}
|
||||
|
||||
pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
|
||||
pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let mode = if PARSE_IN_STRICT_MODE {
|
||||
ParseMode::Strict
|
||||
} else {
|
||||
@@ -150,7 +151,7 @@ enum ParseMode {
|
||||
Lenient,
|
||||
}
|
||||
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseError> {
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let lines: Vec<&str> = patch.trim().lines().collect();
|
||||
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
|
||||
Ok(()) => &lines,
|
||||
@@ -173,7 +174,8 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseErro
|
||||
line_number += hunk_lines;
|
||||
remaining_lines = &remaining_lines[hunk_lines..]
|
||||
}
|
||||
Ok(hunks)
|
||||
let patch = lines.join("\n");
|
||||
Ok(ApplyPatchArgs { hunks, patch })
|
||||
}
|
||||
|
||||
/// Checks the start and end lines of the patch text for `apply_patch`,
|
||||
@@ -425,6 +427,7 @@ fn parse_update_file_chunk(
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
fn test_parse_patch() {
|
||||
assert_eq!(
|
||||
parse_patch_text("bad", ParseMode::Strict),
|
||||
@@ -455,8 +458,10 @@ fn test_parse_patch() {
|
||||
"*** Begin Patch\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(Vec::new())
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
Vec::new()
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
@@ -472,8 +477,10 @@ fn test_parse_patch() {
|
||||
+ return 123\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
AddFile {
|
||||
path: PathBuf::from("path/add.py"),
|
||||
contents: "abc\ndef\n".to_string()
|
||||
@@ -491,7 +498,7 @@ fn test_parse_patch() {
|
||||
is_end_of_file: false
|
||||
}]
|
||||
}
|
||||
])
|
||||
]
|
||||
);
|
||||
// Update hunk followed by another hunk (Add File).
|
||||
assert_eq!(
|
||||
@@ -504,8 +511,10 @@ fn test_parse_patch() {
|
||||
+content\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
UpdateFile {
|
||||
path: PathBuf::from("file.py"),
|
||||
move_path: None,
|
||||
@@ -520,7 +529,7 @@ fn test_parse_patch() {
|
||||
path: PathBuf::from("other.py"),
|
||||
contents: "content\n".to_string()
|
||||
}
|
||||
])
|
||||
]
|
||||
);
|
||||
|
||||
// Update hunk without an explicit @@ header for the first chunk should parse.
|
||||
@@ -533,8 +542,10 @@ fn test_parse_patch() {
|
||||
+bar
|
||||
*** End Patch"#,
|
||||
ParseMode::Strict
|
||||
),
|
||||
Ok(vec![UpdateFile {
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
@@ -543,7 +554,7 @@ fn test_parse_patch() {
|
||||
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}])
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -574,7 +585,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
|
||||
@@ -584,7 +598,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
|
||||
@@ -594,7 +611,10 @@ fn test_parse_patch_lenient() {
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(expected_patch.clone())
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string()
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
|
||||
|
||||
19
codex-rs/arg0/Cargo.toml
Normal file
19
codex-rs/arg0/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-arg0"
|
||||
version = { workspace = true }
|
||||
|
||||
[lib]
|
||||
name = "codex_arg0"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
codex-apply-patch = { path = "../apply-patch" }
|
||||
codex-core = { path = "../core" }
|
||||
codex-linux-sandbox = { path = "../linux-sandbox" }
|
||||
dotenvy = "0.15.7"
|
||||
tokio = { version = "1", features = ["rt-multi-thread"] }
|
||||
91
codex-rs/arg0/src/lib.rs
Normal file
91
codex-rs/arg0/src/lib.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::future::Future;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::CODEX_APPLY_PATCH_ARG1;
|
||||
|
||||
/// While we want to deploy the Codex CLI as a single executable for simplicity,
|
||||
/// we also want to expose some of its functionality as distinct CLIs, so we use
|
||||
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
|
||||
/// us to simulate deploying multiple executables as a single binary on Mac and
|
||||
/// Linux (but not Windows).
|
||||
///
|
||||
/// When the current executable is invoked through the hard-link or alias named
|
||||
/// `codex-linux-sandbox` we *directly* execute
|
||||
/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
|
||||
///
|
||||
/// 1. Use [`dotenvy::from_path`] and [`dotenvy::dotenv`] to modify the
|
||||
/// environment before creating any threads.
|
||||
/// 2. Construct a Tokio multi-thread runtime.
|
||||
/// 3. Derive the path to the current executable (so children can re-invoke the
|
||||
/// sandbox) when running on Linux.
|
||||
/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any
|
||||
/// error. Note that `main_fn` receives `codex_linux_sandbox_exe:
|
||||
/// Option<PathBuf>`, as an argument, which is generally needed as part of
|
||||
/// constructing [`codex_core::config::Config`].
|
||||
///
|
||||
/// This function should be used to wrap any `main()` function in binary crates
|
||||
/// in this workspace that depends on these helper CLIs.
|
||||
pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
|
||||
where
|
||||
F: FnOnce(Option<PathBuf>) -> Fut,
|
||||
Fut: Future<Output = anyhow::Result<()>>,
|
||||
{
|
||||
// Determine if we were invoked via the special alias.
|
||||
let mut args = std::env::args_os();
|
||||
let argv0 = args.next().unwrap_or_default();
|
||||
let exe_name = Path::new(&argv0)
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("");
|
||||
|
||||
if exe_name == "codex-linux-sandbox" {
|
||||
// Safety: [`run_main`] never returns.
|
||||
codex_linux_sandbox::run_main();
|
||||
}
|
||||
|
||||
let argv1 = args.next().unwrap_or_default();
|
||||
if argv1 == CODEX_APPLY_PATCH_ARG1 {
|
||||
let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
|
||||
let exit_code = match patch_arg {
|
||||
Some(patch_arg) => {
|
||||
let mut stdout = std::io::stdout();
|
||||
let mut stderr = std::io::stderr();
|
||||
match codex_apply_patch::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
|
||||
Ok(()) => 0,
|
||||
Err(_) => 1,
|
||||
}
|
||||
}
|
||||
None => {
|
||||
eprintln!("Error: {CODEX_APPLY_PATCH_ARG1} requires a UTF-8 PATCH argument.");
|
||||
1
|
||||
}
|
||||
};
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
// This modifies the environment, which is not thread-safe, so do this
|
||||
// before creating any threads/the Tokio runtime.
|
||||
load_dotenv();
|
||||
|
||||
// Regular invocation – create a Tokio runtime and execute the provided
|
||||
// async entry-point.
|
||||
let runtime = tokio::runtime::Runtime::new()?;
|
||||
runtime.block_on(async move {
|
||||
let codex_linux_sandbox_exe: Option<PathBuf> = if cfg!(target_os = "linux") {
|
||||
std::env::current_exe().ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
main_fn(codex_linux_sandbox_exe).await
|
||||
})
|
||||
}
|
||||
|
||||
/// Load env vars from ~/.codex/.env and `$(pwd)/.env`.
|
||||
fn load_dotenv() {
|
||||
if let Ok(codex_home) = codex_core::config::find_codex_home() {
|
||||
dotenvy::from_path(codex_home.join(".env")).ok();
|
||||
}
|
||||
dotenvy::dotenv().ok();
|
||||
}
|
||||
21
codex-rs/chatgpt/Cargo.toml
Normal file
21
codex-rs/chatgpt/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-chatgpt"
|
||||
version = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
codex-common = { path = "../common", features = ["cli"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-login = { path = "../login" }
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
5
codex-rs/chatgpt/README.md
Normal file
5
codex-rs/chatgpt/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# ChatGPT
|
||||
|
||||
This crate pertains to first party ChatGPT APIs and products such as Codex agent.
|
||||
|
||||
This crate should be primarily built and maintained by OpenAI employees. Please reach out to a maintainer before making an external contribution.
|
||||
101
codex-rs/chatgpt/src/apply_command.rs
Normal file
101
codex-rs/chatgpt/src/apply_command.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
|
||||
use crate::chatgpt_token::init_chatgpt_token_from_auth;
|
||||
use crate::get_task::GetTaskResponse;
|
||||
use crate::get_task::OutputItem;
|
||||
use crate::get_task::PrOutputItem;
|
||||
use crate::get_task::get_task;
|
||||
|
||||
/// Applies the latest diff from a Codex agent task.
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct ApplyCommand {
|
||||
pub task_id: String,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
}
|
||||
pub async fn run_apply_command(
|
||||
apply_cli: ApplyCommand,
|
||||
cwd: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let config = Config::load_with_cli_overrides(
|
||||
apply_cli
|
||||
.config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?,
|
||||
ConfigOverrides::default(),
|
||||
)?;
|
||||
|
||||
init_chatgpt_token_from_auth(&config.codex_home).await?;
|
||||
|
||||
let task_response = get_task(&config, apply_cli.task_id).await?;
|
||||
apply_diff_from_task(task_response, cwd).await
|
||||
}
|
||||
|
||||
pub async fn apply_diff_from_task(
|
||||
task_response: GetTaskResponse,
|
||||
cwd: Option<PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
let diff_turn = match task_response.current_diff_task_turn {
|
||||
Some(turn) => turn,
|
||||
None => anyhow::bail!("No diff turn found"),
|
||||
};
|
||||
let output_diff = diff_turn.output_items.iter().find_map(|item| match item {
|
||||
OutputItem::Pr(PrOutputItem { output_diff }) => Some(output_diff),
|
||||
_ => None,
|
||||
});
|
||||
match output_diff {
|
||||
Some(output_diff) => apply_diff(&output_diff.diff, cwd).await,
|
||||
None => anyhow::bail!("No PR output item found"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn apply_diff(diff: &str, cwd: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
let mut cmd = tokio::process::Command::new("git");
|
||||
if let Some(cwd) = cwd {
|
||||
cmd.current_dir(cwd);
|
||||
}
|
||||
let toplevel_output = cmd
|
||||
.args(vec!["rev-parse", "--show-toplevel"])
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !toplevel_output.status.success() {
|
||||
anyhow::bail!("apply must be run from a git repository.");
|
||||
}
|
||||
|
||||
let repo_root = String::from_utf8(toplevel_output.stdout)?
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let mut git_apply_cmd = tokio::process::Command::new("git")
|
||||
.args(vec!["apply", "--3way"])
|
||||
.current_dir(&repo_root)
|
||||
.stdin(std::process::Stdio::piped())
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()?;
|
||||
|
||||
if let Some(mut stdin) = git_apply_cmd.stdin.take() {
|
||||
tokio::io::AsyncWriteExt::write_all(&mut stdin, diff.as_bytes()).await?;
|
||||
drop(stdin);
|
||||
}
|
||||
|
||||
let output = git_apply_cmd.wait_with_output().await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Git apply failed with status {}: {}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
println!("Successfully applied diff");
|
||||
Ok(())
|
||||
}
|
||||
49
codex-rs/chatgpt/src/chatgpt_client.rs
Normal file
49
codex-rs/chatgpt/src/chatgpt_client.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use codex_core::config::Config;
|
||||
|
||||
use crate::chatgpt_token::get_chatgpt_token_data;
|
||||
use crate::chatgpt_token::init_chatgpt_token_from_auth;
|
||||
|
||||
use anyhow::Context;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
/// Make a GET request to the ChatGPT backend API.
|
||||
pub(crate) async fn chatgpt_get_request<T: DeserializeOwned>(
|
||||
config: &Config,
|
||||
path: String,
|
||||
) -> anyhow::Result<T> {
|
||||
let chatgpt_base_url = &config.chatgpt_base_url;
|
||||
init_chatgpt_token_from_auth(&config.codex_home).await?;
|
||||
|
||||
// Make direct HTTP request to ChatGPT backend API with the token
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{chatgpt_base_url}{path}");
|
||||
|
||||
let token =
|
||||
get_chatgpt_token_data().ok_or_else(|| anyhow::anyhow!("ChatGPT token not available"))?;
|
||||
|
||||
let account_id = token.account_id.ok_or_else(|| {
|
||||
anyhow::anyhow!("ChatGPT account ID not available, please re-run `codex login`")
|
||||
});
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.bearer_auth(&token.access_token)
|
||||
.header("chatgpt-account-id", account_id?)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("User-Agent", "codex-cli")
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send request")?;
|
||||
|
||||
if response.status().is_success() {
|
||||
let result: T = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse JSON response")?;
|
||||
Ok(result)
|
||||
} else {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Request failed with status {}: {}", status, body)
|
||||
}
|
||||
}
|
||||
27
codex-rs/chatgpt/src/chatgpt_token.rs
Normal file
27
codex-rs/chatgpt/src/chatgpt_token.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use std::path::Path;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use codex_login::TokenData;
|
||||
|
||||
static CHATGPT_TOKEN: LazyLock<RwLock<Option<TokenData>>> = LazyLock::new(|| RwLock::new(None));
|
||||
|
||||
pub fn get_chatgpt_token_data() -> Option<TokenData> {
|
||||
CHATGPT_TOKEN.read().ok()?.clone()
|
||||
}
|
||||
|
||||
pub fn set_chatgpt_token_data(value: TokenData) {
|
||||
if let Ok(mut guard) = CHATGPT_TOKEN.write() {
|
||||
*guard = Some(value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize the ChatGPT token from auth.json file
|
||||
pub async fn init_chatgpt_token_from_auth(codex_home: &Path) -> std::io::Result<()> {
|
||||
let auth = codex_login::load_auth(codex_home, true)?;
|
||||
if let Some(auth) = auth {
|
||||
let token_data = auth.get_token_data().await?;
|
||||
set_chatgpt_token_data(token_data);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
40
codex-rs/chatgpt/src/get_task.rs
Normal file
40
codex-rs/chatgpt/src/get_task.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use codex_core::config::Config;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::chatgpt_client::chatgpt_get_request;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct GetTaskResponse {
|
||||
pub current_diff_task_turn: Option<AssistantTurn>,
|
||||
}
|
||||
|
||||
// Only relevant fields for our extraction
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct AssistantTurn {
|
||||
pub output_items: Vec<OutputItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum OutputItem {
|
||||
#[serde(rename = "pr")]
|
||||
Pr(PrOutputItem),
|
||||
|
||||
#[serde(other)]
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct PrOutputItem {
|
||||
pub output_diff: OutputDiff,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct OutputDiff {
|
||||
pub diff: String,
|
||||
}
|
||||
|
||||
pub(crate) async fn get_task(config: &Config, task_id: String) -> anyhow::Result<GetTaskResponse> {
|
||||
let path = format!("/wham/tasks/{task_id}");
|
||||
chatgpt_get_request(config, path).await
|
||||
}
|
||||
4
codex-rs/chatgpt/src/lib.rs
Normal file
4
codex-rs/chatgpt/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod apply_command;
|
||||
mod chatgpt_client;
|
||||
mod chatgpt_token;
|
||||
pub mod get_task;
|
||||
190
codex-rs/chatgpt/tests/apply_command_e2e.rs
Normal file
190
codex-rs/chatgpt/tests/apply_command_e2e.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
#![expect(clippy::expect_used)]
|
||||
|
||||
use codex_chatgpt::apply_command::apply_diff_from_task;
|
||||
use codex_chatgpt::get_task::GetTaskResponse;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Creates a temporary git repository with initial commit
|
||||
async fn create_temp_git_repo() -> anyhow::Result<TempDir> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let repo_path = temp_dir.path();
|
||||
let envs = vec![
|
||||
("GIT_CONFIG_GLOBAL", "/dev/null"),
|
||||
("GIT_CONFIG_NOSYSTEM", "1"),
|
||||
];
|
||||
|
||||
let output = Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["init"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Failed to initialize git repo: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.email", "test@example.com"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.name", "Test User"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
std::fs::write(repo_path.join("README.md"), "# Test Repo\n")?;
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["add", "README.md"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let output = Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["commit", "-m", "Initial commit"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!(
|
||||
"Failed to create initial commit: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Ok(temp_dir)
|
||||
}
|
||||
|
||||
async fn mock_get_task_with_fixture() -> anyhow::Result<GetTaskResponse> {
|
||||
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/task_turn_fixture.json");
|
||||
let fixture_content = std::fs::read_to_string(fixture_path)?;
|
||||
let response: GetTaskResponse = serde_json::from_str(&fixture_content)?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_command_creates_fibonacci_file() {
|
||||
let temp_repo = create_temp_git_repo()
|
||||
.await
|
||||
.expect("Failed to create temp git repo");
|
||||
let repo_path = temp_repo.path();
|
||||
|
||||
let task_response = mock_get_task_with_fixture()
|
||||
.await
|
||||
.expect("Failed to load fixture");
|
||||
|
||||
apply_diff_from_task(task_response, Some(repo_path.to_path_buf()))
|
||||
.await
|
||||
.expect("Failed to apply diff from task");
|
||||
|
||||
// Assert that fibonacci.js was created in scripts/ directory
|
||||
let fibonacci_path = repo_path.join("scripts/fibonacci.js");
|
||||
assert!(fibonacci_path.exists(), "fibonacci.js was not created");
|
||||
|
||||
// Verify the file contents match expected
|
||||
let contents = std::fs::read_to_string(&fibonacci_path).expect("Failed to read fibonacci.js");
|
||||
assert!(
|
||||
contents.contains("function fibonacci(n)"),
|
||||
"fibonacci.js doesn't contain expected function"
|
||||
);
|
||||
assert!(
|
||||
contents.contains("#!/usr/bin/env node"),
|
||||
"fibonacci.js doesn't have shebang"
|
||||
);
|
||||
assert!(
|
||||
contents.contains("module.exports = fibonacci;"),
|
||||
"fibonacci.js doesn't export function"
|
||||
);
|
||||
|
||||
// Verify file has correct number of lines (31 as specified in fixture)
|
||||
let line_count = contents.lines().count();
|
||||
assert_eq!(
|
||||
line_count, 31,
|
||||
"fibonacci.js should have 31 lines, got {line_count}",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_apply_command_with_merge_conflicts() {
|
||||
let temp_repo = create_temp_git_repo()
|
||||
.await
|
||||
.expect("Failed to create temp git repo");
|
||||
let repo_path = temp_repo.path();
|
||||
|
||||
// Create conflicting fibonacci.js file first
|
||||
let scripts_dir = repo_path.join("scripts");
|
||||
std::fs::create_dir_all(&scripts_dir).expect("Failed to create scripts directory");
|
||||
|
||||
let conflicting_content = r#"#!/usr/bin/env node
|
||||
|
||||
// This is a different fibonacci implementation
|
||||
function fib(num) {
|
||||
if (num <= 1) return num;
|
||||
return fib(num - 1) + fib(num - 2);
|
||||
}
|
||||
|
||||
console.log("Running fibonacci...");
|
||||
console.log(fib(10));
|
||||
"#;
|
||||
|
||||
let fibonacci_path = scripts_dir.join("fibonacci.js");
|
||||
std::fs::write(&fibonacci_path, conflicting_content).expect("Failed to write conflicting file");
|
||||
|
||||
Command::new("git")
|
||||
.args(["add", "scripts/fibonacci.js"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to add fibonacci.js");
|
||||
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "Add conflicting fibonacci implementation"])
|
||||
.current_dir(repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to commit conflicting file");
|
||||
|
||||
let original_dir = std::env::current_dir().expect("Failed to get current dir");
|
||||
std::env::set_current_dir(repo_path).expect("Failed to change directory");
|
||||
struct DirGuard(std::path::PathBuf);
|
||||
impl Drop for DirGuard {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::env::set_current_dir(&self.0);
|
||||
}
|
||||
}
|
||||
let _guard = DirGuard(original_dir);
|
||||
|
||||
let task_response = mock_get_task_with_fixture()
|
||||
.await
|
||||
.expect("Failed to load fixture");
|
||||
|
||||
let apply_result = apply_diff_from_task(task_response, Some(repo_path.to_path_buf())).await;
|
||||
|
||||
assert!(
|
||||
apply_result.is_err(),
|
||||
"Expected apply to fail due to merge conflicts"
|
||||
);
|
||||
|
||||
let contents = std::fs::read_to_string(&fibonacci_path).expect("Failed to read fibonacci.js");
|
||||
|
||||
assert!(
|
||||
contents.contains("<<<<<<< HEAD")
|
||||
|| contents.contains("=======")
|
||||
|| contents.contains(">>>>>>> "),
|
||||
"fibonacci.js should contain merge conflict markers, got: {contents}",
|
||||
);
|
||||
}
|
||||
65
codex-rs/chatgpt/tests/task_turn_fixture.json
Normal file
65
codex-rs/chatgpt/tests/task_turn_fixture.json
Normal file
@@ -0,0 +1,65 @@
|
||||
{
|
||||
"current_diff_task_turn": {
|
||||
"output_items": [
|
||||
{
|
||||
"type": "pr",
|
||||
"pr_title": "Add fibonacci script",
|
||||
"pr_message": "## Summary\n- add a basic Fibonacci script under `scripts/`\n\n## Testing\n- `node scripts/fibonacci.js 10`\n- `npm run lint` *(fails: next not found)*",
|
||||
"output_diff": {
|
||||
"type": "output_diff",
|
||||
"repo_id": "/workspace/rddit-vercel",
|
||||
"base_commit_sha": "1a2e9baf2ce2fdd0c126b47b1bcfd512de2a9f7b",
|
||||
"diff": "diff --git a/scripts/fibonacci.js b/scripts/fibonacci.js\nnew file mode 100644\nindex 0000000000000000000000000000000000000000..6c9fdfdbf8669b7968936411050525b995d0a9a6\n--- /dev/null\n+++ b/scripts/fibonacci.js\n@@ -0,0 +1,31 @@\n+#!/usr/bin/env node\n+\n+function fibonacci(n) {\n+ if (n < 0) {\n+ throw new Error(\"n must be non-negative\");\n+ }\n+ let a = 0;\n+ let b = 1;\n+ for (let i = 0; i < n; i++) {\n+ const next = a + b;\n+ a = b;\n+ b = next;\n+ }\n+ return a;\n+}\n+\n+function printUsage() {\n+ console.log(\"Usage: node scripts/fibonacci.js <n>\");\n+}\n+\n+if (require.main === module) {\n+ const arg = process.argv[2];\n+ if (arg === undefined || isNaN(Number(arg))) {\n+ printUsage();\n+ process.exit(1);\n+ }\n+ const n = Number(arg);\n+ console.log(fibonacci(n));\n+}\n+\n+module.exports = fibonacci;\n",
|
||||
"external_storage_diff": {
|
||||
"file_id": "file_00000000114c61f786900f8c2130ace7",
|
||||
"ttl": null
|
||||
},
|
||||
"files_modified": 1,
|
||||
"lines_added": 31,
|
||||
"lines_removed": 0,
|
||||
"commit_message": "Add fibonacci script"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "**Summary**\n\n- Created a command-line Fibonacci script that validates input and prints the result when executed with Node"
|
||||
},
|
||||
{
|
||||
"content_type": "repo_file_citation",
|
||||
"path": "scripts/fibonacci.js",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 31
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n\n**Testing**\n\n- ❌ `npm run lint` (failed to run `next lint`)"
|
||||
},
|
||||
{
|
||||
"content_type": "terminal_chunk_citation",
|
||||
"terminal_chunk_id": "7dd543",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 5
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n- ✅ `node scripts/fibonacci.js 10` produced “55”"
|
||||
},
|
||||
{
|
||||
"content_type": "terminal_chunk_citation",
|
||||
"terminal_chunk_id": "6ee559",
|
||||
"line_range_start": 1,
|
||||
"line_range_end": 3
|
||||
},
|
||||
{
|
||||
"content_type": "text",
|
||||
"text": "\n\nCodex couldn't run certain commands due to environment limitations. Consider configuring a setup script or internet access in your Codex environment to install dependencies."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-cli"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[[bin]]
|
||||
name = "codex"
|
||||
@@ -17,11 +17,13 @@ workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
codex-core = { path = "../core" }
|
||||
clap_complete = "4"
|
||||
codex-arg0 = { path = "../arg0" }
|
||||
codex-chatgpt = { path = "../chatgpt" }
|
||||
codex-common = { path = "../common", features = ["cli"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-exec = { path = "../exec" }
|
||||
codex-login = { path = "../login" }
|
||||
codex-linux-sandbox = { path = "../linux-sandbox" }
|
||||
codex-mcp-server = { path = "../mcp-server" }
|
||||
codex-tui = { path = "../tui" }
|
||||
serde_json = "1"
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::exec::StdioPolicy;
|
||||
use codex_core::config_types::SandboxMode;
|
||||
use codex_core::exec::spawn_command_under_linux_sandbox;
|
||||
use codex_core::exec::spawn_command_under_seatbelt;
|
||||
use codex_core::exec_env::create_env;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
|
||||
use crate::LandlockCommand;
|
||||
use crate::SeatbeltCommand;
|
||||
@@ -20,13 +19,11 @@ pub async fn run_command_under_seatbelt(
|
||||
) -> anyhow::Result<()> {
|
||||
let SeatbeltCommand {
|
||||
full_auto,
|
||||
sandbox,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
sandbox,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
@@ -41,13 +38,11 @@ pub async fn run_command_under_landlock(
|
||||
) -> anyhow::Result<()> {
|
||||
let LandlockCommand {
|
||||
full_auto,
|
||||
sandbox,
|
||||
config_overrides,
|
||||
command,
|
||||
} = command;
|
||||
run_command_under_sandbox(
|
||||
full_auto,
|
||||
sandbox,
|
||||
command,
|
||||
config_overrides,
|
||||
codex_linux_sandbox_exe,
|
||||
@@ -63,20 +58,19 @@ enum SandboxType {
|
||||
|
||||
async fn run_command_under_sandbox(
|
||||
full_auto: bool,
|
||||
sandbox: SandboxPermissionOption,
|
||||
command: Vec<String>,
|
||||
config_overrides: CliConfigOverrides,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
sandbox_type: SandboxType,
|
||||
) -> anyhow::Result<()> {
|
||||
let sandbox_policy = create_sandbox_policy(full_auto, sandbox);
|
||||
let sandbox_mode = create_sandbox_mode(full_auto);
|
||||
let cwd = std::env::current_dir()?;
|
||||
let config = Config::load_with_cli_overrides(
|
||||
config_overrides
|
||||
.parse_overrides()
|
||||
.map_err(anyhow::Error::msg)?,
|
||||
ConfigOverrides {
|
||||
sandbox_policy: Some(sandbox_policy),
|
||||
sandbox_mode: Some(sandbox_mode),
|
||||
codex_linux_sandbox_exe,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -110,13 +104,10 @@ async fn run_command_under_sandbox(
|
||||
handle_exit_status(status);
|
||||
}
|
||||
|
||||
pub fn create_sandbox_policy(full_auto: bool, sandbox: SandboxPermissionOption) -> SandboxPolicy {
|
||||
pub fn create_sandbox_mode(full_auto: bool) -> SandboxMode {
|
||||
if full_auto {
|
||||
SandboxPolicy::new_full_auto_policy()
|
||||
SandboxMode::WorkspaceWrite
|
||||
} else {
|
||||
match sandbox.permissions.map(Into::into) {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
SandboxMode::ReadOnly
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ pub mod proto;
|
||||
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_common::SandboxPermissionOption;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct SeatbeltCommand {
|
||||
@@ -13,9 +12,6 @@ pub struct SeatbeltCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
@@ -30,9 +26,6 @@ pub struct LandlockCommand {
|
||||
#[arg(long = "full-auto", default_value_t = false)]
|
||||
pub full_auto: bool,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub sandbox: SandboxPermissionOption,
|
||||
|
||||
#[clap(skip)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
|
||||
@@ -1,25 +1,16 @@
|
||||
use std::env;
|
||||
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_login::AuthMode;
|
||||
use codex_login::OPENAI_API_KEY_ENV_VAR;
|
||||
use codex_login::load_auth;
|
||||
use codex_login::login_with_api_key;
|
||||
use codex_login::login_with_chatgpt;
|
||||
|
||||
pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) -> ! {
|
||||
let cli_overrides = match cli_config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config_overrides = ConfigOverrides::default();
|
||||
let config = match Config::load_with_cli_overrides(cli_overrides, config_overrides) {
|
||||
Ok(config) => config,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading configuration: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
let config = load_config_or_exit(cli_config_overrides);
|
||||
|
||||
let capture_output = false;
|
||||
match login_with_chatgpt(&config.codex_home, capture_output).await {
|
||||
@@ -33,3 +24,103 @@ pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) ->
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_login_with_api_key(
|
||||
cli_config_overrides: CliConfigOverrides,
|
||||
api_key: String,
|
||||
) -> ! {
|
||||
let config = load_config_or_exit(cli_config_overrides);
|
||||
|
||||
match login_with_api_key(&config.codex_home, &api_key) {
|
||||
Ok(_) => {
|
||||
eprintln!("Successfully logged in");
|
||||
std::process::exit(0);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error logging in: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
|
||||
let config = load_config_or_exit(cli_config_overrides);
|
||||
|
||||
match load_auth(&config.codex_home, true) {
|
||||
Ok(Some(auth)) => match auth.mode {
|
||||
AuthMode::ApiKey => {
|
||||
if let Some(api_key) = auth.api_key.as_deref() {
|
||||
eprintln!("Logged in using an API key - {}", safe_format_key(api_key));
|
||||
|
||||
if let Ok(env_api_key) = env::var(OPENAI_API_KEY_ENV_VAR) {
|
||||
if env_api_key == api_key {
|
||||
eprintln!(
|
||||
" API loaded from OPENAI_API_KEY environment variable or .env file"
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Logged in using an API key");
|
||||
}
|
||||
std::process::exit(0);
|
||||
}
|
||||
AuthMode::ChatGPT => {
|
||||
eprintln!("Logged in using ChatGPT");
|
||||
std::process::exit(0);
|
||||
}
|
||||
},
|
||||
Ok(None) => {
|
||||
eprintln!("Not logged in");
|
||||
std::process::exit(1);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error checking login status: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_config_or_exit(cli_config_overrides: CliConfigOverrides) -> Config {
|
||||
let cli_overrides = match cli_config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config_overrides = ConfigOverrides::default();
|
||||
match Config::load_with_cli_overrides(cli_overrides, config_overrides) {
|
||||
Ok(config) => config,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading configuration: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn safe_format_key(key: &str) -> String {
|
||||
if key.len() <= 13 {
|
||||
return "***".to_string();
|
||||
}
|
||||
let prefix = &key[..8];
|
||||
let suffix = &key[key.len() - 5..];
|
||||
format!("{prefix}***{suffix}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::safe_format_key;
|
||||
|
||||
#[test]
|
||||
fn formats_long_key() {
|
||||
let key = "sk-proj-1234567890ABCDE";
|
||||
assert_eq!(safe_format_key(key), "sk-proj-***ABCDE");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn short_key_returns_stars() {
|
||||
let key = "sk-proj-12345";
|
||||
assert_eq!(safe_format_key(key), "***");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
use clap::CommandFactory;
|
||||
use clap::Parser;
|
||||
use clap_complete::Shell;
|
||||
use clap_complete::generate;
|
||||
use codex_arg0::arg0_dispatch_or_else;
|
||||
use codex_chatgpt::apply_command::ApplyCommand;
|
||||
use codex_chatgpt::apply_command::run_apply_command;
|
||||
use codex_cli::LandlockCommand;
|
||||
use codex_cli::SeatbeltCommand;
|
||||
use codex_cli::login::run_login_status;
|
||||
use codex_cli::login::run_login_with_api_key;
|
||||
use codex_cli::login::run_login_with_chatgpt;
|
||||
use codex_cli::proto;
|
||||
use codex_common::CliConfigOverrides;
|
||||
@@ -37,7 +45,7 @@ enum Subcommand {
|
||||
#[clap(visible_alias = "e")]
|
||||
Exec(ExecCli),
|
||||
|
||||
/// Login with ChatGPT.
|
||||
/// Manage login.
|
||||
Login(LoginCommand),
|
||||
|
||||
/// Experimental: run Codex as an MCP server.
|
||||
@@ -47,8 +55,22 @@ enum Subcommand {
|
||||
#[clap(visible_alias = "p")]
|
||||
Proto(ProtoCli),
|
||||
|
||||
/// Generate shell completion scripts.
|
||||
Completion(CompletionCommand),
|
||||
|
||||
/// Internal debugging commands.
|
||||
Debug(DebugArgs),
|
||||
|
||||
/// Apply the latest diff produced by Codex agent as a `git apply` to your local working tree.
|
||||
#[clap(visible_alias = "a")]
|
||||
Apply(ApplyCommand),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct CompletionCommand {
|
||||
/// Shell to generate completions for
|
||||
#[clap(value_enum, default_value_t = Shell::Bash)]
|
||||
shell: Shell,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -70,10 +92,22 @@ enum DebugCommand {
|
||||
struct LoginCommand {
|
||||
#[clap(skip)]
|
||||
config_overrides: CliConfigOverrides,
|
||||
|
||||
#[arg(long = "api-key", value_name = "API_KEY")]
|
||||
api_key: Option<String>,
|
||||
|
||||
#[command(subcommand)]
|
||||
action: Option<LoginSubcommand>,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Subcommand)]
|
||||
enum LoginSubcommand {
|
||||
/// Show login status.
|
||||
Status,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
codex_linux_sandbox::run_with_sandbox(|codex_linux_sandbox_exe| async move {
|
||||
arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
|
||||
cli_main(codex_linux_sandbox_exe).await?;
|
||||
Ok(())
|
||||
})
|
||||
@@ -86,7 +120,10 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
None => {
|
||||
let mut tui_cli = cli.interactive;
|
||||
prepend_config_flags(&mut tui_cli.config_overrides, cli.config_overrides);
|
||||
codex_tui::run_main(tui_cli, codex_linux_sandbox_exe)?;
|
||||
let usage = codex_tui::run_main(tui_cli, codex_linux_sandbox_exe).await?;
|
||||
if !usage.is_zero() {
|
||||
println!("{}", codex_core::protocol::FinalOutput::from(usage));
|
||||
}
|
||||
}
|
||||
Some(Subcommand::Exec(mut exec_cli)) => {
|
||||
prepend_config_flags(&mut exec_cli.config_overrides, cli.config_overrides);
|
||||
@@ -97,12 +134,26 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
}
|
||||
Some(Subcommand::Login(mut login_cli)) => {
|
||||
prepend_config_flags(&mut login_cli.config_overrides, cli.config_overrides);
|
||||
run_login_with_chatgpt(login_cli.config_overrides).await;
|
||||
match login_cli.action {
|
||||
Some(LoginSubcommand::Status) => {
|
||||
run_login_status(login_cli.config_overrides).await;
|
||||
}
|
||||
None => {
|
||||
if let Some(api_key) = login_cli.api_key {
|
||||
run_login_with_api_key(login_cli.config_overrides, api_key).await;
|
||||
} else {
|
||||
run_login_with_chatgpt(login_cli.config_overrides).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Subcommand::Proto(mut proto_cli)) => {
|
||||
prepend_config_flags(&mut proto_cli.config_overrides, cli.config_overrides);
|
||||
proto::run_main(proto_cli).await?;
|
||||
}
|
||||
Some(Subcommand::Completion(completion_cli)) => {
|
||||
print_completion(completion_cli);
|
||||
}
|
||||
Some(Subcommand::Debug(debug_args)) => match debug_args.cmd {
|
||||
DebugCommand::Seatbelt(mut seatbelt_cli) => {
|
||||
prepend_config_flags(&mut seatbelt_cli.config_overrides, cli.config_overrides);
|
||||
@@ -121,6 +172,10 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
Some(Subcommand::Apply(mut apply_cli)) => {
|
||||
prepend_config_flags(&mut apply_cli.config_overrides, cli.config_overrides);
|
||||
run_apply_command(apply_cli, None).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -136,3 +191,9 @@ fn prepend_config_flags(
|
||||
.raw_overrides
|
||||
.splice(0..0, cli_config_overrides.raw_overrides);
|
||||
}
|
||||
|
||||
fn print_completion(cmd: CompletionCommand) {
|
||||
let mut app = MultitoolCli::command();
|
||||
let name = "codex";
|
||||
generate(cmd.shell, &mut app, name, &mut std::io::stdout());
|
||||
}
|
||||
|
||||
@@ -4,10 +4,12 @@ use std::sync::Arc;
|
||||
use clap::Parser;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::Codex;
|
||||
use codex_core::CodexSpawnOk;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::protocol::Submission;
|
||||
use codex_core::util::notify_on_sigint;
|
||||
use codex_login::load_auth;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::io::BufReader;
|
||||
use tracing::error;
|
||||
@@ -34,8 +36,9 @@ pub async fn run_main(opts: ProtoCli) -> anyhow::Result<()> {
|
||||
.map_err(anyhow::Error::msg)?;
|
||||
|
||||
let config = Config::load_with_cli_overrides(overrides_vec, ConfigOverrides::default())?;
|
||||
let auth = load_auth(&config.codex_home, true)?;
|
||||
let ctrl_c = notify_on_sigint();
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let CodexSpawnOk { codex, .. } = Codex::spawn(config, auth, ctrl_c.clone()).await?;
|
||||
let codex = Arc::new(codex);
|
||||
|
||||
// Task that reads JSON lines from stdin and forwards to Submission Queue
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-common"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -9,10 +9,11 @@ workspace = true
|
||||
[dependencies]
|
||||
clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
|
||||
codex-core = { path = "../core" }
|
||||
toml = { version = "0.8", optional = true }
|
||||
serde = { version = "1", optional = true }
|
||||
toml = { version = "0.9", optional = true }
|
||||
|
||||
[features]
|
||||
# Separate feature so that `clap` is not a mandatory dependency.
|
||||
cli = ["clap", "toml", "serde"]
|
||||
cli = ["clap", "serde", "toml"]
|
||||
elapsed = []
|
||||
sandbox_summary = []
|
||||
|
||||
@@ -1,26 +1,25 @@
|
||||
//! Standard type to use with the `--approval-mode` CLI option.
|
||||
//! Available when the `cli` feature is enabled for the crate.
|
||||
|
||||
use clap::ArgAction;
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
|
||||
use codex_core::config::parse_sandbox_permission_with_base_path;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPermission;
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum ApprovalModeCliArg {
|
||||
/// Only run "trusted" commands (e.g. ls, cat, sed) without asking for user
|
||||
/// approval. Will escalate to the user if the model proposes a command that
|
||||
/// is not in the "trusted" set.
|
||||
Untrusted,
|
||||
|
||||
/// Run all commands without asking for user approval.
|
||||
/// Only asks for approval if a command fails to execute, in which case it
|
||||
/// will escalate to the user to ask for un-sandboxed execution.
|
||||
OnFailure,
|
||||
|
||||
/// Only run "known safe" commands (e.g. ls, cat, sed) without
|
||||
/// asking for user approval. Will escalate to the user if the model
|
||||
/// proposes a command that is not allow-listed.
|
||||
UnlessAllowListed,
|
||||
/// The model decides when to ask the user for approval.
|
||||
OnRequest,
|
||||
|
||||
/// Never ask for user approval
|
||||
/// Execution failures are immediately returned to the model.
|
||||
@@ -30,44 +29,10 @@ pub enum ApprovalModeCliArg {
|
||||
impl From<ApprovalModeCliArg> for AskForApproval {
|
||||
fn from(value: ApprovalModeCliArg) -> Self {
|
||||
match value {
|
||||
ApprovalModeCliArg::Untrusted => AskForApproval::UnlessTrusted,
|
||||
ApprovalModeCliArg::OnFailure => AskForApproval::OnFailure,
|
||||
ApprovalModeCliArg::UnlessAllowListed => AskForApproval::UnlessAllowListed,
|
||||
ApprovalModeCliArg::OnRequest => AskForApproval::OnRequest,
|
||||
ApprovalModeCliArg::Never => AskForApproval::Never,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct SandboxPermissionOption {
|
||||
/// Specify this flag multiple times to specify the full set of permissions
|
||||
/// to grant to Codex.
|
||||
///
|
||||
/// ```shell
|
||||
/// codex -s disk-full-read-access \
|
||||
/// -s disk-write-cwd \
|
||||
/// -s disk-write-platform-user-temp-folder \
|
||||
/// -s disk-write-platform-global-temp-folder
|
||||
/// ```
|
||||
///
|
||||
/// Note disk-write-folder takes a value:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-write-folder=$HOME/.pyenv/shims
|
||||
/// ```
|
||||
///
|
||||
/// These permissions are quite broad and should be used with caution:
|
||||
///
|
||||
/// ```shell
|
||||
/// -s disk-full-write-access
|
||||
/// -s network-full-access
|
||||
/// ```
|
||||
#[arg(long = "sandbox-permission", short = 's', action = ArgAction::Append, value_parser = parse_sandbox_permission)]
|
||||
pub permissions: Option<Vec<SandboxPermission>>,
|
||||
}
|
||||
|
||||
/// Custom value-parser so we can keep the CLI surface small *and*
|
||||
/// still handle the parameterised `disk-write-folder` case.
|
||||
fn parse_sandbox_permission(raw: &str) -> std::io::Result<SandboxPermission> {
|
||||
let base_path = std::env::current_dir()?;
|
||||
parse_sandbox_permission_with_base_path(raw, base_path)
|
||||
}
|
||||
|
||||
@@ -64,7 +64,11 @@ impl CliConfigOverrides {
|
||||
// `-c model=o3` without the quotes.
|
||||
let value: Value = match parse_toml_value(value_str) {
|
||||
Ok(v) => v,
|
||||
Err(_) => Value::String(value_str.to_string()),
|
||||
Err(_) => {
|
||||
// Strip leading/trailing quotes if present
|
||||
let trimmed = value_str.trim().trim_matches(|c| c == '"' || c == '\'');
|
||||
Value::String(trimmed.to_string())
|
||||
}
|
||||
};
|
||||
|
||||
Ok((key.to_string(), value))
|
||||
|
||||
29
codex-rs/common/src/config_summary.rs
Normal file
29
codex-rs/common/src/config_summary.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
use codex_core::WireApi;
|
||||
use codex_core::config::Config;
|
||||
|
||||
use crate::sandbox_summary::summarize_sandbox_policy;
|
||||
|
||||
/// Build a list of key/value pairs summarizing the effective configuration.
|
||||
pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
|
||||
let mut entries = vec![
|
||||
("workdir", config.cwd.display().to_string()),
|
||||
("model", config.model.clone()),
|
||||
("provider", config.model_provider_id.clone()),
|
||||
("approval", config.approval_policy.to_string()),
|
||||
("sandbox", summarize_sandbox_policy(&config.sandbox_policy)),
|
||||
];
|
||||
if config.model_provider.wire_api == WireApi::Responses
|
||||
&& config.model_family.supports_reasoning_summaries
|
||||
{
|
||||
entries.push((
|
||||
"reasoning effort",
|
||||
config.model_reasoning_effort.to_string(),
|
||||
));
|
||||
entries.push((
|
||||
"reasoning summaries",
|
||||
config.model_reasoning_summary.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
@@ -20,7 +20,7 @@ pub fn format_duration(duration: Duration) -> String {
|
||||
|
||||
fn format_elapsed_millis(millis: i64) -> String {
|
||||
if millis < 1000 {
|
||||
format!("{}ms", millis)
|
||||
format!("{millis}ms")
|
||||
} else if millis < 60_000 {
|
||||
format!("{:.2}s", millis as f64 / 1000.0)
|
||||
} else {
|
||||
|
||||
@@ -6,11 +6,24 @@ pub mod elapsed;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::ApprovalModeCliArg;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use approval_mode_cli_arg::SandboxPermissionOption;
|
||||
mod sandbox_mode_cli_arg;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use sandbox_mode_cli_arg::SandboxModeCliArg;
|
||||
|
||||
#[cfg(any(feature = "cli", test))]
|
||||
mod config_override;
|
||||
|
||||
#[cfg(feature = "cli")]
|
||||
pub use config_override::CliConfigOverrides;
|
||||
|
||||
mod sandbox_summary;
|
||||
|
||||
#[cfg(feature = "sandbox_summary")]
|
||||
pub use sandbox_summary::summarize_sandbox_policy;
|
||||
|
||||
mod config_summary;
|
||||
|
||||
pub use config_summary::create_config_summary_entries;
|
||||
|
||||
28
codex-rs/common/src/sandbox_mode_cli_arg.rs
Normal file
28
codex-rs/common/src/sandbox_mode_cli_arg.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
//! Standard type to use with the `--sandbox` (`-s`) CLI option.
|
||||
//!
|
||||
//! This mirrors the variants of [`codex_core::protocol::SandboxPolicy`], but
|
||||
//! without any of the associated data so it can be expressed as a simple flag
|
||||
//! on the command-line. Users that need to tweak the advanced options for
|
||||
//! `workspace-write` can continue to do so via `-c` overrides or their
|
||||
//! `config.toml`.
|
||||
|
||||
use clap::ValueEnum;
|
||||
use codex_core::config_types::SandboxMode;
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
#[value(rename_all = "kebab-case")]
|
||||
pub enum SandboxModeCliArg {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
DangerFullAccess,
|
||||
}
|
||||
|
||||
impl From<SandboxModeCliArg> for SandboxMode {
|
||||
fn from(value: SandboxModeCliArg) -> Self {
|
||||
match value {
|
||||
SandboxModeCliArg::ReadOnly => SandboxMode::ReadOnly,
|
||||
SandboxModeCliArg::WorkspaceWrite => SandboxMode::WorkspaceWrite,
|
||||
SandboxModeCliArg::DangerFullAccess => SandboxMode::DangerFullAccess,
|
||||
}
|
||||
}
|
||||
}
|
||||
32
codex-rs/common/src/sandbox_summary.rs
Normal file
32
codex-rs/common/src/sandbox_summary.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
|
||||
pub fn summarize_sandbox_policy(sandbox_policy: &SandboxPolicy) -> String {
|
||||
match sandbox_policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access,
|
||||
include_default_writable_roots,
|
||||
} => {
|
||||
let mut summary = "workspace-write".to_string();
|
||||
if !writable_roots.is_empty() {
|
||||
summary.push_str(&format!(
|
||||
" [{}]",
|
||||
writable_roots
|
||||
.iter()
|
||||
.map(|p| p.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
));
|
||||
}
|
||||
if !*include_default_writable_roots {
|
||||
summary.push_str(" (exact writable roots)");
|
||||
}
|
||||
if *network_access {
|
||||
summary.push_str(" (network access enabled)");
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,41 +20,11 @@ The model that Codex should use.
|
||||
model = "o3" # overrides the default of "codex-mini-latest"
|
||||
```
|
||||
|
||||
## model_provider
|
||||
|
||||
Codex comes bundled with a number of "model providers" predefined. This config value is a string that indicates which provider to use. You can also define your own providers via `model_providers`.
|
||||
|
||||
For example, if you are running ollama with Mistral locally, then you would need to add the following to your config:
|
||||
|
||||
```toml
|
||||
model = "mistral"
|
||||
model_provider = "ollama"
|
||||
```
|
||||
|
||||
because the following definition for `ollama` is included in Codex:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
wire_api = "chat"
|
||||
```
|
||||
|
||||
This option defaults to `"openai"` and the corresponding provider is defined as follows:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "responses"
|
||||
```
|
||||
|
||||
## model_providers
|
||||
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the correspodning provider.
|
||||
This option lets you override and amend the default set of model providers bundled with Codex. This value is a map where the key is the value to use with `model_provider` to select the corresponding provider.
|
||||
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you
|
||||
For example, if you wanted to add a provider that uses the OpenAI 4o model via the chat completions API, then you could add the following configuration:
|
||||
|
||||
```toml
|
||||
# Recall that in TOML, root keys must be listed before tables.
|
||||
@@ -71,8 +41,97 @@ base_url = "https://api.openai.com/v1"
|
||||
# using Codex with this provider. The value of the environment variable must be
|
||||
# non-empty and will be used in the `Bearer TOKEN` HTTP header for the POST request.
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# valid values for wire_api are "chat" and "responses".
|
||||
# Valid values for wire_api are "chat" and "responses". Defaults to "chat" if omitted.
|
||||
wire_api = "chat"
|
||||
# If necessary, extra query params that need to be added to the URL.
|
||||
# See the Azure example below.
|
||||
query_params = {}
|
||||
```
|
||||
|
||||
Note this makes it possible to use Codex CLI with non-OpenAI models, so long as they use a wire API that is compatible with the OpenAI chat completions API. For example, you could define the following provider to use Codex CLI with Ollama running locally:
|
||||
|
||||
```toml
|
||||
[model_providers.ollama]
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
```
|
||||
|
||||
Or a third-party provider (using a distinct environment variable for the API key):
|
||||
|
||||
```toml
|
||||
[model_providers.mistral]
|
||||
name = "Mistral"
|
||||
base_url = "https://api.mistral.ai/v1"
|
||||
env_key = "MISTRAL_API_KEY"
|
||||
```
|
||||
|
||||
Note that Azure requires `api-version` to be passed as a query parameter, so be sure to specify it as part of `query_params` when defining the Azure provider:
|
||||
|
||||
```toml
|
||||
[model_providers.azure]
|
||||
name = "Azure"
|
||||
# Make sure you set the appropriate subdomain for this URL.
|
||||
base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY" # Or "OPENAI_API_KEY", whichever you use.
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
```
|
||||
|
||||
It is also possible to configure a provider to include extra HTTP headers with a request. These can be hardcoded values (`http_headers`) or values read from environment variables (`env_http_headers`):
|
||||
|
||||
```toml
|
||||
[model_providers.example]
|
||||
# name, base_url, ...
|
||||
|
||||
# This will add the HTTP header `X-Example-Header` with value `example-value`
|
||||
# to each request to the model provider.
|
||||
http_headers = { "X-Example-Header" = "example-value" }
|
||||
|
||||
# This will add the HTTP header `X-Example-Features` with the value of the
|
||||
# `EXAMPLE_FEATURES` environment variable to each request to the model provider
|
||||
# _if_ the environment variable is set and its value is non-empty.
|
||||
env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }
|
||||
```
|
||||
|
||||
### Per-provider network tuning
|
||||
|
||||
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted top‑level keys; those are now ignored.)
|
||||
|
||||
Example:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# network tuning overrides (all optional; falls back to built‑in defaults)
|
||||
request_max_retries = 4 # retry failed HTTP requests
|
||||
stream_max_retries = 10 # retry dropped SSE streams
|
||||
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
||||
```
|
||||
|
||||
#### request_max_retries
|
||||
|
||||
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
|
||||
|
||||
#### stream_max_retries
|
||||
|
||||
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
|
||||
|
||||
#### stream_idle_timeout_ms
|
||||
|
||||
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300_000` (5 minutes).
|
||||
|
||||
## model_provider
|
||||
|
||||
Identifies which provider to use from the `model_providers` map. Defaults to `"openai"`. You can override the `base_url` for the built-in `openai` provider via the `OPENAI_BASE_URL` environment variable.
|
||||
|
||||
Note that if you override `model_provider`, then you likely want to override
|
||||
`model`, as well. For example, if you are running ollama with Mistral locally,
|
||||
then you would need to add the following to your config in addition to the new entry in the `model_providers` map:
|
||||
|
||||
```toml
|
||||
model_provider = "ollama"
|
||||
model = "mistral"
|
||||
```
|
||||
|
||||
## approval_policy
|
||||
@@ -80,16 +139,29 @@ wire_api = "chat"
|
||||
Determines when the user should be prompted to approve whether Codex can execute a command:
|
||||
|
||||
```toml
|
||||
# This is analogous to --suggest in the TypeScript Codex CLI
|
||||
approval_policy = "unless-allow-listed"
|
||||
# Codex has hardcoded logic that defines a set of "trusted" commands.
|
||||
# Setting the approval_policy to `untrusted` means that Codex will prompt the
|
||||
# user before running a command not in the "trusted" set.
|
||||
#
|
||||
# See https://github.com/openai/codex/issues/1260 for the plan to enable
|
||||
# end-users to define their own trusted commands.
|
||||
approval_policy = "untrusted"
|
||||
```
|
||||
|
||||
If you want to be notified whenever a command fails, use "on-failure":
|
||||
```toml
|
||||
# If the command fails when run in the sandbox, Codex asks for permission to
|
||||
# retry the command outside the sandbox.
|
||||
approval_policy = "on-failure"
|
||||
```
|
||||
|
||||
If you want the model to run until it decides that it needs to ask you for escalated permissions, use "on-request":
|
||||
```toml
|
||||
# The model decides when to escalate
|
||||
approval_policy = "on-request"
|
||||
```
|
||||
|
||||
Alternatively, you can have the model run until it is done, and never ask to run a command with escalated permissions:
|
||||
```toml
|
||||
# User is never prompted: if the command fails, Codex will automatically try
|
||||
# something out. Note the `exec` subcommand always uses this mode.
|
||||
@@ -106,7 +178,6 @@ Here is an example of a `config.toml` that defines multiple profiles:
|
||||
```toml
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
disable_response_storage = false
|
||||
|
||||
# Setting `profile` is equivalent to specifying `--profile o3` on the command
|
||||
@@ -123,6 +194,8 @@ wire_api = "chat"
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
model_reasoning_effort = "high"
|
||||
model_reasoning_summary = "detailed"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
@@ -170,31 +243,57 @@ To disable reasoning summaries, set `model_reasoning_summary` to `"none"` in you
|
||||
model_reasoning_summary = "none" # disable reasoning summaries
|
||||
```
|
||||
|
||||
## sandbox_permissions
|
||||
## model_supports_reasoning_summaries
|
||||
|
||||
List of permissions to grant to the sandbox that Codex uses to execute untrusted commands:
|
||||
By default, `reasoning` is only set on requests to OpenAI models that are known to support them. To force `reasoning` to set on requests to the current model, you can force this behavior by setting the following in `config.toml`:
|
||||
|
||||
```toml
|
||||
# This is comparable to --full-auto in the TypeScript Codex CLI, though
|
||||
# specifying `disk-write-platform-global-temp-folder` adds /tmp as a writable
|
||||
# folder in addition to $TMPDIR.
|
||||
sandbox_permissions = [
|
||||
"disk-full-read-access",
|
||||
"disk-write-platform-user-temp-folder",
|
||||
"disk-write-platform-global-temp-folder",
|
||||
"disk-write-cwd",
|
||||
]
|
||||
model_supports_reasoning_summaries = true
|
||||
```
|
||||
|
||||
To add additional writable folders, use `disk-write-folder`, which takes a parameter (this can be specified multiple times):
|
||||
## sandbox_mode
|
||||
|
||||
Codex executes model-generated shell commands inside an OS-level sandbox.
|
||||
|
||||
In most cases you can pick the desired behaviour with a single option:
|
||||
|
||||
```toml
|
||||
sandbox_permissions = [
|
||||
# ...
|
||||
"disk-write-folder=/Users/mbolin/.pyenv/shims",
|
||||
]
|
||||
# same as `--sandbox read-only`
|
||||
sandbox_mode = "read-only"
|
||||
```
|
||||
|
||||
The default policy is `read-only`, which means commands can read any file on
|
||||
disk, but attempts to write a file or access the network will be blocked.
|
||||
|
||||
A more relaxed policy is `workspace-write`. When specified, the current working directory for the Codex task will be writable (as well as `$TMPDIR` on macOS). Note that the CLI defaults to using the directory where it was spawned as `cwd`, though this can be overridden using `--cwd/-C`.
|
||||
|
||||
On macOS (and soon Linux), all writable roots (including `cwd`) that contain a `.git/` folder _as an immediate child_ will configure the `.git/` folder to be read-only while the rest of the Git repository will be writable. This means that commands like `git commit` will fail, by default (as it entails writing to `.git/`), and will require Codex to ask for permission.
|
||||
|
||||
```toml
|
||||
# same as `--sandbox workspace-write`
|
||||
sandbox_mode = "workspace-write"
|
||||
|
||||
# Extra settings that only apply when `sandbox = "workspace-write"`.
|
||||
[sandbox_workspace_write]
|
||||
# By default, only the cwd for the Codex session will be writable (and $TMPDIR
|
||||
# on macOS), but you can specify additional writable folders in this array.
|
||||
writable_roots = ["/tmp"]
|
||||
# Allow the command being run inside the sandbox to make outbound network
|
||||
# requests. Disabled by default.
|
||||
network_access = false
|
||||
```
|
||||
|
||||
To disable sandboxing altogether, specify `danger-full-access` like so:
|
||||
|
||||
```toml
|
||||
# same as `--sandbox danger-full-access`
|
||||
sandbox_mode = "danger-full-access"
|
||||
```
|
||||
|
||||
This is reasonable to use if Codex is running in an environment that provides its own sandboxing (such as a Docker container) such that further sandboxing is unnecessary.
|
||||
|
||||
Though using this option may also be necessary if you try to use Codex in environments where its native sandboxing mechanisms are unsupported, such as older Linux kernels or on Windows.
|
||||
|
||||
## mcp_servers
|
||||
|
||||
Defines the list of MCP servers that Codex can consult for tool use. Currently, only servers that are launched by executing a program that communicate over stdio are supported. For servers that use the SSE transport, consider an adapter like [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy).
|
||||
@@ -384,7 +483,7 @@ Currently, `"vscode"` is the default, though Codex does not verify VS Code is in
|
||||
|
||||
## hide_agent_reasoning
|
||||
|
||||
Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
|
||||
Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:
|
||||
|
||||
@@ -392,6 +491,29 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the
|
||||
hide_agent_reasoning = true # defaults to false
|
||||
```
|
||||
|
||||
## show_raw_agent_reasoning
|
||||
|
||||
Surfaces the model’s raw chain-of-thought ("raw reasoning content") when available.
|
||||
|
||||
Notes:
|
||||
- Only takes effect if the selected model/provider actually emits raw reasoning content. Many models do not. When unsupported, this option has no visible effect.
|
||||
- Raw reasoning may include intermediate thoughts or sensitive context. Enable only if acceptable for your workflow.
|
||||
|
||||
Example:
|
||||
```toml
|
||||
show_raw_agent_reasoning = true # defaults to false
|
||||
```
|
||||
|
||||
## model_context_window
|
||||
|
||||
The size of the context window for the model, in tokens.
|
||||
|
||||
In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
|
||||
|
||||
## model_max_output_tokens
|
||||
|
||||
This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
|
||||
|
||||
## project_doc_max_bytes
|
||||
|
||||
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
|
||||
@@ -402,14 +524,5 @@ Options that are specific to the TUI.
|
||||
|
||||
```toml
|
||||
[tui]
|
||||
# This will make it so that Codex does not try to process mouse events, which
|
||||
# means your Terminal's native drag-to-text to text selection and copy/paste
|
||||
# should work. The tradeoff is that Codex will not receive any mouse events, so
|
||||
# it will not be possible to use the mouse to scroll conversation history.
|
||||
#
|
||||
# Note that most terminals support holding down a modifier key when using the
|
||||
# mouse to support text selection. For example, even if Codex mouse capture is
|
||||
# enabled (i.e., this is set to `false`), you can still hold down alt while
|
||||
# dragging the mouse to select text.
|
||||
disable_mouse_capture = true # defaults to `false`
|
||||
# More to come here
|
||||
```
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "codex-core"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "codex_core"
|
||||
@@ -13,8 +13,9 @@ workspace = true
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
async-channel = "2.3.1"
|
||||
base64 = "0.21"
|
||||
base64 = "0.22"
|
||||
bytes = "1.10.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
codex-apply-patch = { path = "../apply-patch" }
|
||||
codex-login = { path = "../login" }
|
||||
codex-mcp-client = { path = "../mcp-client" }
|
||||
@@ -22,18 +23,19 @@ dirs = "6"
|
||||
env-flags = "0.1.1"
|
||||
eventsource-stream = "0.2.3"
|
||||
fs2 = "0.4.3"
|
||||
fs-err = "3.1.0"
|
||||
futures = "0.3"
|
||||
libc = "0.2.174"
|
||||
mcp-types = { path = "../mcp-types" }
|
||||
mime_guess = "2.0"
|
||||
patch = "0.7"
|
||||
path-absolutize = "3.1.1"
|
||||
rand = "0.9"
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
strum = "0.27.1"
|
||||
strum_macros = "0.27.1"
|
||||
serde_bytes = "0.11"
|
||||
sha1 = "0.10.6"
|
||||
shlex = "1.3.0"
|
||||
similar = "2.7.0"
|
||||
strum_macros = "0.27.2"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
|
||||
tokio = { version = "1", features = [
|
||||
@@ -44,13 +46,15 @@ tokio = { version = "1", features = [
|
||||
"signal",
|
||||
] }
|
||||
tokio-util = "0.7.14"
|
||||
toml = "0.8.20"
|
||||
toml = "0.9.4"
|
||||
tracing = { version = "0.1.41", features = ["log"] }
|
||||
tree-sitter = "0.25.3"
|
||||
tree-sitter-bash = "0.23.3"
|
||||
tree-sitter = "0.25.8"
|
||||
tree-sitter-bash = "0.25.0"
|
||||
uuid = { version = "1", features = ["serde", "v4"] }
|
||||
whoami = "1.6.0"
|
||||
wildmatch = "2.4.0"
|
||||
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
landlock = "0.4.1"
|
||||
seccompiler = "0.5.0"
|
||||
@@ -65,8 +69,11 @@ openssl-sys = { version = "*", features = ["vendored"] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2"
|
||||
core_test_support = { path = "tests/common" }
|
||||
maplit = "1.0.2"
|
||||
predicates = "3"
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
tokio-test = "0.4"
|
||||
walkdir = "2.5.0"
|
||||
wiremock = "0.6"
|
||||
|
||||
@@ -2,9 +2,18 @@
|
||||
|
||||
This crate implements the business logic for Codex. It is designed to be used by the various Codex UIs written in Rust.
|
||||
|
||||
Though for non-Rust UIs, we are also working to define a _protocol_ for talking to Codex. See:
|
||||
## Dependencies
|
||||
|
||||
- [Specification](../docs/protocol_v1.md)
|
||||
- [Rust types](./src/protocol.rs)
|
||||
Note that `codex-core` makes some assumptions about certain helper utilities being available in the environment. Currently, this
|
||||
|
||||
You can use the `proto` subcommand using the executable in the [`cli` crate](../cli) to speak the protocol using newline-delimited-JSON over stdin/stdout.
|
||||
### macOS
|
||||
|
||||
Expects `/usr/bin/sandbox-exec` to be present.
|
||||
|
||||
### Linux
|
||||
|
||||
Expects the binary containing `codex-core` to run the equivalent of `codex debug landlock` when `arg0` is `codex-linux-sandbox`. See the `codex-arg0` crate for details.
|
||||
|
||||
### All Platforms
|
||||
|
||||
Expects the binary containing `codex-core` to simulate the virtual `apply_patch` CLI when `arg1` is `--codex-run-as-apply-patch`. See the `codex-arg0` crate for details.
|
||||
|
||||
@@ -1,16 +1,31 @@
|
||||
Please resolve the user's task by editing and testing the code files in your current code execution session.
|
||||
You are a deployed coding agent.
|
||||
Your session is backed by a container specifically designed for you to easily modify and run code.
|
||||
The repo(s) are already cloned in your working directory, and you must fully solve the problem for your answer to be considered correct.
|
||||
You are operating as and within the Codex CLI, an open-source, terminal-based agentic coding assistant built by OpenAI. It wraps OpenAI models to enable natural language interaction with a local codebase. You are expected to be precise, safe, and helpful.
|
||||
|
||||
Your capabilities:
|
||||
- Receive user prompts, project context, and files.
|
||||
- Stream responses and emit function calls (e.g., shell commands, code edits).
|
||||
- Run commands, like apply_patch, and manage user approvals based on policy.
|
||||
- Work inside a workspace with sandboxing instructions specified by the policy described in (## Sandbox environment and approval instructions)
|
||||
|
||||
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
|
||||
|
||||
## General guidelines
|
||||
As a deployed coding agent, please continue working on the user's task until their query is resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the task is solved. If you are not sure about file content or codebase structure pertaining to the user's request, use your tools to read files and gather the relevant information. Do NOT guess or make up an answer.
|
||||
|
||||
After a user sends their first message, you should immediately provide a brief message acknowledging their request to set the tone and expectation of future work to be done (no more than 8-10 words). This should be done before performing work like exploring the codebase, writing or reading files, or other tool calls needed to complete the task. Use a natural, collaborative tone similar to how a teammate would receive a task during a pair programming session.
|
||||
|
||||
Please resolve the user's task by editing the code files in your current code execution session. Your session allows for you to modify and run code. The repo(s) are already cloned in your working directory, and you must fully solve the problem for your answer to be considered correct.
|
||||
|
||||
### Task execution
|
||||
You MUST adhere to the following criteria when executing the task:
|
||||
|
||||
- Working on the repo(s) in the current environment is allowed, even if they are proprietary.
|
||||
- Analyzing code for vulnerabilities is allowed.
|
||||
- Showing user code and tool call details is allowed.
|
||||
- User instructions may overwrite the _CODING GUIDELINES_ section in this developer message.
|
||||
- `user_instructions` are not part of the user's request, but guidance for how to complete the task.
|
||||
- Do not cite `user_instructions` back to the user unless a specific piece is relevant.
|
||||
- Do not use \`ls -R\`, \`find\`, or \`grep\` - these are slow in large repos. Use \`rg\` and \`rg --files\`.
|
||||
- Use \`apply_patch\` to edit files: {"cmd":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n- pass\\n+ return 123\\n*** End Patch"]}
|
||||
- Use the \`apply_patch\` shell command to edit files: {"command":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n- pass\\n+ return 123\\n*** End Patch"]}
|
||||
- If completing the user's task requires writing or modifying files:
|
||||
- Your code and final answer should follow these _CODING GUIDELINES_:
|
||||
- Fix the problem at the root cause rather than applying surface-level patches, when possible.
|
||||
@@ -33,23 +48,22 @@ You MUST adhere to the following criteria when executing the task:
|
||||
- If completing the user's task DOES NOT require writing or modifying files (e.g., the user asks a question about the code base):
|
||||
- Respond in a friendly tune as a remote teammate, who is knowledgeable, capable and eager to help with coding.
|
||||
- When your task involves writing or modifying files:
|
||||
- Do NOT tell the user to "save the file" or "copy the code into a file" if you already created or modified the file using \`apply_patch\`. Instead, reference the file as already saved.
|
||||
- Do NOT tell the user to "save the file" or "copy the code into a file" if you already created or modified the file using the `apply_patch` shell command. Instead, reference the file as already saved.
|
||||
- Do NOT show the full contents of large files you have already written, unless the user explicitly asks for them.
|
||||
|
||||
§ `apply-patch` Specification
|
||||
## Using the shell command `apply_patch` to edit files
|
||||
`apply_patch` is a shell command for editing files. Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:
|
||||
|
||||
Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:
|
||||
|
||||
**_ Begin Patch
|
||||
*** Begin Patch
|
||||
[ one or more file sections ]
|
||||
_** End Patch
|
||||
*** End Patch
|
||||
|
||||
Within that envelope, you get a sequence of file operations.
|
||||
You MUST include a header to specify the action you are taking.
|
||||
Each operation starts with one of three headers:
|
||||
|
||||
**_ Add File: <path> - create a new file. Every following line is a + line (the initial contents).
|
||||
_** Delete File: <path> - remove an existing file. Nothing follows.
|
||||
*** Add File: <path> - create a new file. Every following line is a + line (the initial contents).
|
||||
*** Delete File: <path> - remove an existing file. Nothing follows.
|
||||
\*\*\* Update File: <path> - patch an existing file in place (optionally with a rename).
|
||||
|
||||
May be immediately followed by \*\*\* Move to: <new path> if you want to rename the file.
|
||||
@@ -63,36 +77,60 @@ Within a hunk each line starts with:
|
||||
At the end of a truncated hunk you can emit \*\*\* End of File.
|
||||
|
||||
Patch := Begin { FileOp } End
|
||||
Begin := "**_ Begin Patch" NEWLINE
|
||||
End := "_** End Patch" NEWLINE
|
||||
Begin := "*** Begin Patch" NEWLINE
|
||||
End := "*** End Patch" NEWLINE
|
||||
FileOp := AddFile | DeleteFile | UpdateFile
|
||||
AddFile := "**_ Add File: " path NEWLINE { "+" line NEWLINE }
|
||||
DeleteFile := "_** Delete File: " path NEWLINE
|
||||
UpdateFile := "**_ Update File: " path NEWLINE [ MoveTo ] { Hunk }
|
||||
MoveTo := "_** Move to: " newPath NEWLINE
|
||||
AddFile := "*** Add File: " path NEWLINE { "+" line NEWLINE }
|
||||
DeleteFile := "*** Delete File: " path NEWLINE
|
||||
UpdateFile := "*** Update File: " path NEWLINE [ MoveTo ] { Hunk }
|
||||
MoveTo := "*** Move to: " newPath NEWLINE
|
||||
Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
|
||||
HunkLine := (" " | "-" | "+") text NEWLINE
|
||||
|
||||
A full patch can combine several operations:
|
||||
|
||||
**_ Begin Patch
|
||||
_** Add File: hello.txt
|
||||
*** Begin Patch
|
||||
*** Add File: hello.txt
|
||||
+Hello world
|
||||
**_ Update File: src/app.py
|
||||
_** Move to: src/main.py
|
||||
*** Update File: src/app.py
|
||||
*** Move to: src/main.py
|
||||
@@ def greet():
|
||||
-print("Hi")
|
||||
+print("Hello, world!")
|
||||
**_ Delete File: obsolete.txt
|
||||
_** End Patch
|
||||
*** Delete File: obsolete.txt
|
||||
*** End Patch
|
||||
|
||||
It is important to remember:
|
||||
|
||||
- You must include a header with your intended action (Add/Delete/Update)
|
||||
- You must prefix new lines with `+` even when creating a new file
|
||||
- You must follow this schema exactly when providing a patch
|
||||
|
||||
You can invoke apply_patch like:
|
||||
You can invoke apply_patch with the following shell command:
|
||||
|
||||
```
|
||||
shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]}
|
||||
```
|
||||
|
||||
## Sandbox environment and approval instructions
|
||||
|
||||
You are running in a sandboxed workspace backed by version control. The sandbox might be configured by the user to restrict certain behaviors, like accessing the internet or writing to files outside the current directory.
|
||||
|
||||
Commands that are blocked by sandbox settings will be automatically sent to the user for approval. The result of the request will be returned (i.e. the command result, or the request denial).
|
||||
The user also has an opportunity to approve the same command for the rest of the session.
|
||||
|
||||
Guidance on running within the sandbox:
|
||||
- When running commands that will likely require approval, attempt to use simple, precise commands, to reduce frequency of approval requests.
|
||||
- When approval is denied or a command fails due to a permission error, do not retry the exact command in a different way. Move on and continue trying to address the user's request.
|
||||
|
||||
|
||||
## Tools available
|
||||
### Plan updates
|
||||
|
||||
A tool named `update_plan` is available. Use it to keep an up‑to‑date, step‑by‑step plan for the task so you can follow your progress. When making your plans, keep in mind that you are a deployed coding agent - `update_plan` calls should not involve doing anything that you aren't capable of doing. For example, `update_plan` calls should NEVER contain tasks to merge your own pull requests. Only stop to ask the user if you genuinely need their feedback on a change.
|
||||
|
||||
- At the start of any nontrivial task, call `update_plan` with an initial plan: a short list of 1‑sentence steps with a `status` for each step (`pending`, `in_progress`, or `completed`). There should always be exactly one `in_progress` step until everything is done.
|
||||
- Whenever you finish a step, call `update_plan` again, marking the finished step as `completed` and the next step as `in_progress`.
|
||||
- If your plan needs to change, call `update_plan` with the revised steps and include an `explanation` describing the change.
|
||||
- When all steps are complete, make a final `update_plan` call with all steps marked `completed`.
|
||||
|
||||
|
||||
157
codex-rs/core/src/apply_patch.rs
Normal file
157
codex-rs/core/src/apply_patch.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use crate::codex::Session;
|
||||
use crate::models::FunctionCallOutputPayload;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::protocol::FileChange;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::safety::SafetyCheck;
|
||||
use crate::safety::assess_patch_safety;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
use codex_apply_patch::ApplyPatchFileChange;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub const CODEX_APPLY_PATCH_ARG1: &str = "--codex-run-as-apply-patch";
|
||||
|
||||
pub(crate) enum InternalApplyPatchInvocation {
|
||||
/// The `apply_patch` call was handled programmatically, without any sort
|
||||
/// of sandbox, because the user explicitly approved it. This is the
|
||||
/// result to use with the `shell` function call that contained `apply_patch`.
|
||||
Output(ResponseInputItem),
|
||||
|
||||
/// The `apply_patch` call was approved, either automatically because it
|
||||
/// appears that it should be allowed based on the user's sandbox policy
|
||||
/// *or* because the user explicitly approved it. In either case, we use
|
||||
/// exec with [`CODEX_APPLY_PATCH_ARG1`] to realize the `apply_patch` call,
|
||||
/// but [`ApplyPatchExec::auto_approved`] is used to determine the sandbox
|
||||
/// used with the `exec()`.
|
||||
DelegateToExec(ApplyPatchExec),
|
||||
}
|
||||
|
||||
pub(crate) struct ApplyPatchExec {
|
||||
pub(crate) action: ApplyPatchAction,
|
||||
pub(crate) user_explicitly_approved_this_action: bool,
|
||||
}
|
||||
|
||||
impl From<ResponseInputItem> for InternalApplyPatchInvocation {
|
||||
fn from(item: ResponseInputItem) -> Self {
|
||||
InternalApplyPatchInvocation::Output(item)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn apply_patch(
|
||||
sess: &Session,
|
||||
sub_id: &str,
|
||||
call_id: &str,
|
||||
action: ApplyPatchAction,
|
||||
) -> InternalApplyPatchInvocation {
|
||||
let writable_roots_snapshot = {
|
||||
#[allow(clippy::unwrap_used)]
|
||||
let guard = sess.writable_roots.lock().unwrap();
|
||||
guard.clone()
|
||||
};
|
||||
|
||||
match assess_patch_safety(
|
||||
&action,
|
||||
sess.approval_policy,
|
||||
&writable_roots_snapshot,
|
||||
&sess.cwd,
|
||||
) {
|
||||
SafetyCheck::AutoApprove { .. } => {
|
||||
InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: false,
|
||||
})
|
||||
}
|
||||
SafetyCheck::AskUser => {
|
||||
// Compute a readable summary of path changes to include in the
|
||||
// approval request so the user can make an informed decision.
|
||||
//
|
||||
// Note that it might be worth expanding this approval request to
|
||||
// give the user the option to expand the set of writable roots so
|
||||
// that similar patches can be auto-approved in the future during
|
||||
// this session.
|
||||
let rx_approve = sess
|
||||
.request_patch_approval(sub_id.to_owned(), call_id.to_owned(), &action, None, None)
|
||||
.await;
|
||||
match rx_approve.await.unwrap_or_default() {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
||||
InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec {
|
||||
action,
|
||||
user_explicitly_approved_this_action: true,
|
||||
})
|
||||
}
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_owned(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "patch rejected by user".to_string(),
|
||||
success: Some(false),
|
||||
},
|
||||
}
|
||||
.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
SafetyCheck::Reject { reason } => ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_owned(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("patch rejected: {reason}"),
|
||||
success: Some(false),
|
||||
},
|
||||
}
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn convert_apply_patch_to_protocol(
|
||||
action: &ApplyPatchAction,
|
||||
) -> HashMap<PathBuf, FileChange> {
|
||||
let changes = action.changes();
|
||||
let mut result = HashMap::with_capacity(changes.len());
|
||||
for (path, change) in changes {
|
||||
let protocol_change = match change {
|
||||
ApplyPatchFileChange::Add { content } => FileChange::Add {
|
||||
content: content.clone(),
|
||||
},
|
||||
ApplyPatchFileChange::Delete => FileChange::Delete,
|
||||
ApplyPatchFileChange::Update {
|
||||
unified_diff,
|
||||
move_path,
|
||||
new_content: _new_content,
|
||||
} => FileChange::Update {
|
||||
unified_diff: unified_diff.clone(),
|
||||
move_path: move_path.clone(),
|
||||
},
|
||||
};
|
||||
result.insert(path.clone(), protocol_change);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn get_writable_roots(cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::new();
|
||||
if cfg!(target_os = "macos") {
|
||||
// On macOS, $TMPDIR is private to the user.
|
||||
writable_roots.push(std::env::temp_dir());
|
||||
|
||||
// Allow pyenv to update its shims directory. Without this, any tool
|
||||
// that happens to be managed by `pyenv` will fail with an error like:
|
||||
//
|
||||
// pyenv: cannot rehash: $HOME/.pyenv/shims isn't writable
|
||||
//
|
||||
// which is emitted every time `pyenv` tries to run `rehash` (for
|
||||
// example, after installing a new Python package that drops an entry
|
||||
// point). Although the sandbox is intentionally read‑only by default,
|
||||
// writing to the user's local `pyenv` directory is safe because it
|
||||
// is already user‑writable and scoped to the current user account.
|
||||
if let Ok(home_dir) = std::env::var("HOME") {
|
||||
let pyenv_dir = PathBuf::from(home_dir).join(".pyenv");
|
||||
writable_roots.push(pyenv_dir);
|
||||
}
|
||||
}
|
||||
|
||||
writable_roots.push(cwd.to_path_buf());
|
||||
|
||||
writable_roots
|
||||
}
|
||||
219
codex-rs/core/src/bash.rs
Normal file
219
codex-rs/core/src/bash.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter::Tree;
|
||||
use tree_sitter_bash::LANGUAGE as BASH;
|
||||
|
||||
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
|
||||
/// success or None if parsing failed.
|
||||
pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
|
||||
let lang = BASH.into();
|
||||
let mut parser = Parser::new();
|
||||
#[expect(clippy::expect_used)]
|
||||
parser.set_language(&lang).expect("load bash grammar");
|
||||
let old_tree: Option<&Tree> = None;
|
||||
parser.parse(bash_lc_arg, old_tree)
|
||||
}
|
||||
|
||||
/// Parse a script which may contain multiple simple commands joined only by
|
||||
/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
|
||||
///
|
||||
/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
|
||||
/// command and the parse tree does not contain disallowed constructs
|
||||
/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
|
||||
/// returns `None`.
|
||||
pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
|
||||
if tree.root_node().has_error() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// List of allowed (named) node kinds for a "word only commands sequence".
|
||||
// If we encounter a named node that is not in this list we reject.
|
||||
const ALLOWED_KINDS: &[&str] = &[
|
||||
// top level containers
|
||||
"program",
|
||||
"list",
|
||||
"pipeline",
|
||||
// commands & words
|
||||
"command",
|
||||
"command_name",
|
||||
"word",
|
||||
"string",
|
||||
"string_content",
|
||||
"raw_string",
|
||||
"number",
|
||||
];
|
||||
// Allow only safe punctuation / operator tokens; anything else causes reject.
|
||||
const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
|
||||
|
||||
let root = tree.root_node();
|
||||
let mut cursor = root.walk();
|
||||
let mut stack = vec![root];
|
||||
let mut command_nodes = Vec::new();
|
||||
while let Some(node) = stack.pop() {
|
||||
let kind = node.kind();
|
||||
if node.is_named() {
|
||||
if !ALLOWED_KINDS.contains(&kind) {
|
||||
return None;
|
||||
}
|
||||
if kind == "command" {
|
||||
command_nodes.push(node);
|
||||
}
|
||||
} else {
|
||||
// Reject any punctuation / operator tokens that are not explicitly allowed.
|
||||
if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
|
||||
return None;
|
||||
}
|
||||
if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
|
||||
// If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
|
||||
// Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
for child in node.children(&mut cursor) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
|
||||
let mut commands = Vec::new();
|
||||
for node in command_nodes {
|
||||
if let Some(words) = parse_plain_command_from_node(node, src) {
|
||||
commands.push(words);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(commands)
|
||||
}
|
||||
|
||||
fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
|
||||
if cmd.kind() != "command" {
|
||||
return None;
|
||||
}
|
||||
let mut words = Vec::new();
|
||||
let mut cursor = cmd.walk();
|
||||
for child in cmd.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"command_name" => {
|
||||
let word_node = child.named_child(0)?;
|
||||
if word_node.kind() != "word" {
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"word" | "number" => {
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"string" => {
|
||||
if child.child_count() == 3
|
||||
&& child.child(0)?.kind() == "\""
|
||||
&& child.child(1)?.kind() == "string_content"
|
||||
&& child.child(2)?.kind() == "\""
|
||||
{
|
||||
words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
"raw_string" => {
|
||||
let raw_string = child.utf8_text(src.as_bytes()).ok()?;
|
||||
let stripped = raw_string
|
||||
.strip_prefix('\'')
|
||||
.and_then(|s| s.strip_suffix('\''));
|
||||
if let Some(s) = stripped {
|
||||
words.push(s.to_owned());
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(words)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
use super::*;
|
||||
|
||||
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
|
||||
let tree = try_parse_bash(src)?;
|
||||
try_parse_word_only_commands_sequence(&tree, src)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_single_simple_command() {
|
||||
let cmds = parse_seq("ls -1").unwrap();
|
||||
assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_multiple_commands_with_allowed_operators() {
|
||||
let src = "ls && pwd; echo 'hi there' | wc -l";
|
||||
let cmds = parse_seq(src).unwrap();
|
||||
let expected: Vec<Vec<String>> = vec![
|
||||
vec!["wc".to_string(), "-l".to_string()],
|
||||
vec!["echo".to_string(), "hi there".to_string()],
|
||||
vec!["pwd".to_string()],
|
||||
vec!["ls".to_string()],
|
||||
];
|
||||
assert_eq!(cmds, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_double_and_single_quoted_strings() {
|
||||
let cmds = parse_seq("echo \"hello world\"").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec!["echo".to_string(), "hello world".to_string()]]
|
||||
);
|
||||
|
||||
let cmds2 = parse_seq("echo 'hi there'").unwrap();
|
||||
assert_eq!(
|
||||
cmds2,
|
||||
vec![vec!["echo".to_string(), "hi there".to_string()]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_numbers_as_words() {
|
||||
let cmds = parse_seq("echo 123 456").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec![
|
||||
"echo".to_string(),
|
||||
"123".to_string(),
|
||||
"456".to_string()
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_parentheses_and_subshells() {
|
||||
assert!(parse_seq("(ls)").is_none());
|
||||
assert!(parse_seq("ls || (pwd && echo hi)").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_redirections_and_unsupported_operators() {
|
||||
assert!(parse_seq("ls > out.txt").is_none());
|
||||
assert!(parse_seq("echo hi & echo bye").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_command_and_process_substitutions_and_expansions() {
|
||||
assert!(parse_seq("echo $(pwd)").is_none());
|
||||
assert!(parse_seq("echo `pwd`").is_none());
|
||||
assert!(parse_seq("echo $HOME").is_none());
|
||||
assert!(parse_seq("echo \"hi $USER\"").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_variable_assignment_prefix() {
|
||||
assert!(parse_seq("FOO=bar ls").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_trailing_operator_parse_error() {
|
||||
assert!(parse_seq("ls &&").is_none());
|
||||
}
|
||||
}
|
||||
@@ -21,9 +21,9 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::models::ContentItem;
|
||||
use crate::models::ReasoningItemContent;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_chat_completions_api;
|
||||
use crate::util::backoff;
|
||||
@@ -31,19 +31,21 @@ use crate::util::backoff;
|
||||
/// Implementation for the classic Chat Completions API.
|
||||
pub(crate) async fn stream_chat_completions(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
model_family: &ModelFamily,
|
||||
client: &reqwest::Client,
|
||||
provider: &ModelProviderInfo,
|
||||
) -> Result<ResponseStream> {
|
||||
// Build messages array
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(model);
|
||||
let full_instructions = prompt.get_full_instructions(model_family);
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
|
||||
for item in &prompt.input {
|
||||
let input = prompt.get_formatted_input();
|
||||
|
||||
for item in &input {
|
||||
match item {
|
||||
ResponseItem::Message { role, content } => {
|
||||
ResponseItem::Message { role, content, .. } => {
|
||||
let mut text = String::new();
|
||||
for c in content {
|
||||
match c {
|
||||
@@ -60,6 +62,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
messages.push(json!({
|
||||
"role": "assistant",
|
||||
@@ -106,31 +109,27 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
}
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
|
||||
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
|
||||
let payload = json!({
|
||||
"model": model,
|
||||
"model": model_family.slug,
|
||||
"messages": messages,
|
||||
"stream": true,
|
||||
"tools": tools_json,
|
||||
});
|
||||
|
||||
let base_url = provider.base_url.trim_end_matches('/');
|
||||
let url = format!("{}/chat/completions", base_url);
|
||||
|
||||
debug!(
|
||||
"POST to {url}: {}",
|
||||
"POST to {}: {}",
|
||||
provider.get_full_url(&None),
|
||||
serde_json::to_string_pretty(&payload).unwrap_or_default()
|
||||
);
|
||||
|
||||
let api_key = provider.api_key()?;
|
||||
let mut attempt = 0;
|
||||
let max_retries = provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
let mut req_builder = client.post(&url);
|
||||
if let Some(api_key) = &api_key {
|
||||
req_builder = req_builder.bearer_auth(api_key.clone());
|
||||
}
|
||||
let req_builder = provider.create_request_builder(client, &None).await?;
|
||||
|
||||
let res = req_builder
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&payload)
|
||||
@@ -139,9 +138,13 @@ pub(crate) async fn stream_chat_completions(
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
|
||||
tokio::spawn(process_chat_sse(stream, tx_event));
|
||||
tokio::spawn(process_chat_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
));
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
Ok(res) => {
|
||||
@@ -151,7 +154,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -167,7 +170,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
@@ -180,14 +183,15 @@ pub(crate) async fn stream_chat_completions(
|
||||
/// Lightweight SSE processor for the Chat Completions streaming format. The
|
||||
/// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
|
||||
/// of the pipeline can stay agnostic of the underlying wire format.
|
||||
async fn process_chat_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
|
||||
where
|
||||
async fn process_chat_sse<S>(
|
||||
stream: S,
|
||||
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
S: Stream<Item = Result<Bytes>> + Unpin,
|
||||
{
|
||||
let mut stream = stream.eventsource();
|
||||
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
|
||||
// State to accumulate a function call across streaming chunks.
|
||||
// OpenAI may split the `arguments` string over multiple `delta` events
|
||||
// until the chunk whose `finish_reason` is `tool_calls` is emitted. We
|
||||
@@ -202,6 +206,8 @@ where
|
||||
}
|
||||
|
||||
let mut fn_call_state = FunctionCallState::default();
|
||||
let mut assistant_text = String::new();
|
||||
let mut reasoning_text = String::new();
|
||||
|
||||
loop {
|
||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||
@@ -215,6 +221,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -229,9 +236,35 @@ where
|
||||
|
||||
// OpenAI Chat streaming sends a literal string "[DONE]" when finished.
|
||||
if sse.data.trim() == "[DONE]" {
|
||||
// Emit any finalized items before closing so downstream consumers receive
|
||||
// terminal events for both assistant content and raw reasoning.
|
||||
if !assistant_text.is_empty() {
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: std::mem::take(&mut assistant_text),
|
||||
}],
|
||||
id: None,
|
||||
};
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
|
||||
if !reasoning_text.is_empty() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut reasoning_text),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
return;
|
||||
@@ -247,20 +280,47 @@ where
|
||||
let choice_opt = chunk.get("choices").and_then(|c| c.get(0));
|
||||
|
||||
if let Some(choice) = choice_opt {
|
||||
// Handle assistant content tokens.
|
||||
// Handle assistant content tokens as streaming deltas.
|
||||
if let Some(content) = choice
|
||||
.get("delta")
|
||||
.and_then(|d| d.get("content"))
|
||||
.and_then(|c| c.as_str())
|
||||
{
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: content.to_string(),
|
||||
}],
|
||||
};
|
||||
if !content.is_empty() {
|
||||
assistant_text.push_str(content);
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::OutputTextDelta(content.to_string())))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
// Forward any reasoning/thinking deltas if present.
|
||||
// Some providers stream `reasoning` as a plain string while others
|
||||
// nest the text under an object (e.g. `{ "reasoning": { "text": "…" } }`).
|
||||
if let Some(reasoning_val) = choice.get("delta").and_then(|d| d.get("reasoning")) {
|
||||
let mut maybe_text = reasoning_val.as_str().map(|s| s.to_string());
|
||||
|
||||
if maybe_text.is_none() && reasoning_val.is_object() {
|
||||
if let Some(s) = reasoning_val
|
||||
.get("text")
|
||||
.and_then(|t| t.as_str())
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
maybe_text = Some(s.to_string());
|
||||
} else if let Some(s) = reasoning_val
|
||||
.get("content")
|
||||
.and_then(|t| t.as_str())
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
maybe_text = Some(s.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(reasoning) = maybe_text {
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::ReasoningContentDelta(reasoning)))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle streaming function / tool calls.
|
||||
@@ -297,18 +357,55 @@ where
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
||||
match finish_reason {
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// Build the FunctionCall response item.
|
||||
// First, flush the terminal raw reasoning so UIs can finalize
|
||||
// the reasoning stream before any exec/tool events begin.
|
||||
if !reasoning_text.is_empty() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut reasoning_text),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
|
||||
// Then emit the FunctionCall response item.
|
||||
let item = ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
|
||||
arguments: fn_call_state.arguments.clone(),
|
||||
call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
|
||||
};
|
||||
|
||||
// Emit it downstream.
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
"stop" => {
|
||||
// Regular turn without tool-call.
|
||||
// Regular turn without tool-call. Emit the final assistant message
|
||||
// as a single OutputItemDone so non-delta consumers see the result.
|
||||
if !assistant_text.is_empty() {
|
||||
let item = ResponseItem::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: std::mem::take(&mut assistant_text),
|
||||
}],
|
||||
id: None,
|
||||
};
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
// Also emit a terminal Reasoning item so UIs can finalize raw reasoning.
|
||||
if !reasoning_text.is_empty() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut reasoning_text),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -317,6 +414,7 @@ where
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
token_usage: None,
|
||||
}))
|
||||
.await;
|
||||
|
||||
@@ -345,10 +443,17 @@ where
|
||||
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
|
||||
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
|
||||
/// events.
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AggregateMode {
|
||||
AggregatedOnly,
|
||||
Streaming,
|
||||
}
|
||||
pub(crate) struct AggregatedChatStream<S> {
|
||||
inner: S,
|
||||
cumulative: String,
|
||||
pending_completed: Option<ResponseEvent>,
|
||||
cumulative_reasoning: String,
|
||||
pending: std::collections::VecDeque<ResponseEvent>,
|
||||
mode: AggregateMode,
|
||||
}
|
||||
|
||||
impl<S> Stream for AggregatedChatStream<S>
|
||||
@@ -360,8 +465,8 @@ where
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.get_mut();
|
||||
|
||||
// First, flush any buffered Completed event from the previous call.
|
||||
if let Some(ev) = this.pending_completed.take() {
|
||||
// First, flush any buffered events from the previous call.
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
|
||||
@@ -378,42 +483,111 @@ where
|
||||
let is_assistant_delta = matches!(&item, crate::models::ResponseItem::Message { role, .. } if role == "assistant");
|
||||
|
||||
if is_assistant_delta {
|
||||
if let crate::models::ResponseItem::Message { content, .. } = &item {
|
||||
if let Some(text) = content.iter().find_map(|c| match c {
|
||||
crate::models::ContentItem::OutputText { text } => Some(text),
|
||||
_ => None,
|
||||
}) {
|
||||
this.cumulative.push_str(text);
|
||||
// Only use the final assistant message if we have not
|
||||
// seen any deltas; otherwise, deltas already built the
|
||||
// cumulative text and this would duplicate it.
|
||||
if this.cumulative.is_empty() {
|
||||
if let crate::models::ResponseItem::Message { content, .. } = &item {
|
||||
if let Some(text) = content.iter().find_map(|c| match c {
|
||||
crate::models::ContentItem::OutputText { text } => Some(text),
|
||||
_ => None,
|
||||
}) {
|
||||
this.cumulative.push_str(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Swallow partial assistant chunk; keep polling.
|
||||
// Swallow assistant message here; emit on Completed.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not an assistant message – forward immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}))) => {
|
||||
// Build any aggregated items in the correct order: Reasoning first, then Message.
|
||||
let mut emitted_any = false;
|
||||
|
||||
if !this.cumulative_reasoning.is_empty()
|
||||
&& matches!(this.mode, AggregateMode::AggregatedOnly)
|
||||
{
|
||||
let aggregated_reasoning = crate::models::ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![
|
||||
crate::models::ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut this.cumulative_reasoning),
|
||||
},
|
||||
]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
if !this.cumulative.is_empty() {
|
||||
let aggregated_item = crate::models::ResponseItem::Message {
|
||||
let aggregated_message = crate::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![crate::models::ContentItem::OutputText {
|
||||
text: std::mem::take(&mut this.cumulative),
|
||||
}],
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
// Buffer Completed so it is returned *after* the aggregated message.
|
||||
this.pending_completed = Some(ResponseEvent::Completed { response_id });
|
||||
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
|
||||
aggregated_item,
|
||||
))));
|
||||
// Always emit Completed last when anything was aggregated.
|
||||
if emitted_any {
|
||||
this.pending.push_back(ResponseEvent::Completed {
|
||||
response_id: response_id.clone(),
|
||||
token_usage: token_usage.clone(),
|
||||
});
|
||||
// Return the first pending event now.
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing aggregated – forward Completed directly.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
|
||||
} // No other `Ok` variants exist at the moment, continue polling.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
})));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
||||
// These events are exclusive to the Responses API and
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
|
||||
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
|
||||
this.cumulative.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta(delta)))) => {
|
||||
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
|
||||
this.cumulative_reasoning.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta(delta))));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -427,7 +601,7 @@ pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Size
|
||||
///
|
||||
/// ```ignore
|
||||
/// OutputItemDone(<full message>)
|
||||
/// Completed { .. }
|
||||
/// Completed
|
||||
/// ```
|
||||
///
|
||||
/// No other `OutputItemDone` events will be seen by the caller.
|
||||
@@ -441,12 +615,24 @@ pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Size
|
||||
/// }
|
||||
/// ```
|
||||
fn aggregate(self) -> AggregatedChatStream<Self> {
|
||||
AggregatedChatStream {
|
||||
inner: self,
|
||||
cumulative: String::new(),
|
||||
pending_completed: None,
|
||||
}
|
||||
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
|
||||
|
||||
impl<S> AggregatedChatStream<S> {
|
||||
fn new(inner: S, mode: AggregateMode) -> Self {
|
||||
AggregatedChatStream {
|
||||
inner,
|
||||
cumulative: String::new(),
|
||||
cumulative_reasoning: String::new(),
|
||||
pending: std::collections::VecDeque::new(),
|
||||
mode,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn streaming_mode(inner: S) -> Self {
|
||||
Self::new(inner, AggregateMode::Streaming)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
use codex_login::AuthMode;
|
||||
use codex_login::CodexAuth;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::prelude::*;
|
||||
use reqwest::StatusCode;
|
||||
@@ -15,6 +17,7 @@ use tokio_util::io::ReaderStream;
|
||||
use tracing::debug;
|
||||
use tracing::trace;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::chat_completions::AggregateStreamExt;
|
||||
use crate::chat_completions::stream_chat_completions;
|
||||
@@ -23,40 +26,46 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::client_common::ResponsesApiRequest;
|
||||
use crate::client_common::create_reasoning_param_for_request;
|
||||
use crate::config::Config;
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::error::Result;
|
||||
use crate::flags::CODEX_RS_SSE_FIXTURE;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::create_tools_json_for_responses_api;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::util::backoff;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ModelClient {
|
||||
model: String,
|
||||
config: Arc<Config>,
|
||||
auth: Option<CodexAuth>,
|
||||
client: reqwest::Client,
|
||||
provider: ModelProviderInfo,
|
||||
session_id: Uuid,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
}
|
||||
|
||||
impl ModelClient {
|
||||
pub fn new(
|
||||
model: impl ToString,
|
||||
config: Arc<Config>,
|
||||
auth: Option<CodexAuth>,
|
||||
provider: ModelProviderInfo,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
session_id: Uuid,
|
||||
) -> Self {
|
||||
Self {
|
||||
model: model.to_string(),
|
||||
config,
|
||||
auth,
|
||||
client: reqwest::Client::new(),
|
||||
provider,
|
||||
session_id,
|
||||
effort,
|
||||
summary,
|
||||
}
|
||||
@@ -70,14 +79,22 @@ impl ModelClient {
|
||||
WireApi::Responses => self.stream_responses(prompt).await,
|
||||
WireApi::Chat => {
|
||||
// Create the raw streaming connection first.
|
||||
let response_stream =
|
||||
stream_chat_completions(prompt, &self.model, &self.client, &self.provider)
|
||||
.await?;
|
||||
let response_stream = stream_chat_completions(
|
||||
prompt,
|
||||
&self.config.model_family,
|
||||
&self.client,
|
||||
&self.provider,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Wrap it with the aggregation adapter so callers see *only*
|
||||
// the final assistant message per turn (matching the
|
||||
// behaviour of the Responses API).
|
||||
let mut aggregated = response_stream.aggregate();
|
||||
let mut aggregated = if self.config.show_raw_agent_reasoning {
|
||||
crate::chat_completions::AggregatedChatStream::streaming_mode(response_stream)
|
||||
} else {
|
||||
response_stream.aggregate()
|
||||
};
|
||||
|
||||
// Bridge the aggregated stream back into a standard
|
||||
// `ResponseStream` by forwarding events through a channel.
|
||||
@@ -103,56 +120,116 @@ impl ModelClient {
|
||||
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
|
||||
// short circuit for tests
|
||||
warn!(path, "Streaming from fixture");
|
||||
return stream_from_fixture(path).await;
|
||||
return stream_from_fixture(path, self.provider.clone()).await;
|
||||
}
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(&self.model);
|
||||
let tools_json = create_tools_json_for_responses_api(prompt, &self.model)?;
|
||||
let reasoning = create_reasoning_param_for_request(&self.model, self.effort, self.summary);
|
||||
let auth = self.auth.clone();
|
||||
|
||||
let auth_mode = auth.as_ref().map(|a| a.mode);
|
||||
|
||||
if self.config.model_family.requires_chatgpt_auth && auth_mode != Some(AuthMode::ChatGPT) {
|
||||
return Err(CodexErr::UnexpectedStatus(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!(
|
||||
"{slug} is only supported with ChatGPT auth, run `codex login status` to check your auth status and `codex login` to login with ChatGPT",
|
||||
slug = self.config.model_family.slug
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let store = prompt.store && auth_mode != Some(AuthMode::ChatGPT);
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(&self.config.model_family);
|
||||
let tools_json = create_tools_json_for_responses_api(&prompt.tools)?;
|
||||
let reasoning = create_reasoning_param_for_request(
|
||||
&self.config.model_family,
|
||||
self.effort,
|
||||
self.summary,
|
||||
);
|
||||
|
||||
// Request encrypted COT if we are not storing responses,
|
||||
// otherwise reasoning items will be referenced by ID
|
||||
let include: Vec<String> = if !store && reasoning.is_some() {
|
||||
vec!["reasoning.encrypted_content".to_string()]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
let input_with_instructions = prompt.get_formatted_input();
|
||||
|
||||
let payload = ResponsesApiRequest {
|
||||
model: &self.model,
|
||||
model: &self.config.model,
|
||||
instructions: &full_instructions,
|
||||
input: &prompt.input,
|
||||
input: &input_with_instructions,
|
||||
tools: &tools_json,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
reasoning,
|
||||
previous_response_id: prompt.prev_id.clone(),
|
||||
store: prompt.store,
|
||||
store,
|
||||
stream: true,
|
||||
include,
|
||||
};
|
||||
|
||||
let base_url = self.provider.base_url.clone();
|
||||
let base_url = base_url.trim_end_matches('/');
|
||||
let url = format!("{}/responses", base_url);
|
||||
trace!("POST to {url}: {}", serde_json::to_string(&payload)?);
|
||||
|
||||
let mut attempt = 0;
|
||||
let max_retries = self.provider.request_max_retries();
|
||||
|
||||
trace!(
|
||||
"POST to {}: {}",
|
||||
self.provider.get_full_url(&auth),
|
||||
serde_json::to_string(&payload)?
|
||||
);
|
||||
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
let api_key = self.provider.api_key()?.ok_or_else(|| {
|
||||
CodexErr::EnvVar(EnvVarError {
|
||||
var: self.provider.env_key.clone().unwrap_or_default(),
|
||||
instructions: None,
|
||||
})
|
||||
})?;
|
||||
let res = self
|
||||
.client
|
||||
.post(&url)
|
||||
.bearer_auth(api_key)
|
||||
let mut req_builder = self
|
||||
.provider
|
||||
.create_request_builder(&self.client, &auth)
|
||||
.await?;
|
||||
|
||||
req_builder = req_builder
|
||||
.header("OpenAI-Beta", "responses=experimental")
|
||||
.header("session_id", self.session_id.to_string())
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await;
|
||||
.json(&payload);
|
||||
|
||||
if let Some(auth) = auth.as_ref()
|
||||
&& auth.mode == AuthMode::ChatGPT
|
||||
&& let Some(account_id) = auth.get_account_id().await
|
||||
{
|
||||
req_builder = req_builder.header("chatgpt-account-id", account_id);
|
||||
}
|
||||
|
||||
let originator = self
|
||||
.config
|
||||
.internal_originator
|
||||
.as_deref()
|
||||
.unwrap_or("codex_cli_rs");
|
||||
req_builder = req_builder.header("originator", originator);
|
||||
|
||||
let res = req_builder.send().await;
|
||||
if let Ok(resp) = &res {
|
||||
trace!(
|
||||
"Response status: {}, request-id: {}",
|
||||
resp.status(),
|
||||
resp.headers()
|
||||
.get("x-request-id")
|
||||
.map(|v| v.to_str().unwrap_or_default())
|
||||
.unwrap_or_default()
|
||||
);
|
||||
}
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
|
||||
// spawn task to process SSE
|
||||
let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
|
||||
tokio::spawn(process_sse(stream, tx_event));
|
||||
tokio::spawn(process_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
self.provider.stream_idle_timeout(),
|
||||
));
|
||||
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
@@ -167,11 +244,11 @@ impl ModelClient {
|
||||
// negligible.
|
||||
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
|
||||
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
|
||||
let body = (res.text().await).unwrap_or_default();
|
||||
let body = res.text().await.unwrap_or_default();
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -188,7 +265,7 @@ impl ModelClient {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > *OPENAI_REQUEST_MAX_RETRIES {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
@@ -197,6 +274,10 @@ impl ModelClient {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_provider(&self) -> ModelProviderInfo {
|
||||
self.provider.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
@@ -205,23 +286,61 @@ struct SseEvent {
|
||||
kind: String,
|
||||
response: Option<Value>,
|
||||
item: Option<Value>,
|
||||
delta: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCreated {}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompleted {
|
||||
id: String,
|
||||
usage: Option<ResponseCompletedUsage>,
|
||||
}
|
||||
|
||||
async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
|
||||
where
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedUsage {
|
||||
input_tokens: u64,
|
||||
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
|
||||
output_tokens: u64,
|
||||
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
|
||||
total_tokens: u64,
|
||||
}
|
||||
|
||||
impl From<ResponseCompletedUsage> for TokenUsage {
|
||||
fn from(val: ResponseCompletedUsage) -> Self {
|
||||
TokenUsage {
|
||||
input_tokens: val.input_tokens,
|
||||
cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
|
||||
output_tokens: val.output_tokens,
|
||||
reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
|
||||
total_tokens: val.total_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedInputTokensDetails {
|
||||
cached_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ResponseCompletedOutputTokensDetails {
|
||||
reasoning_tokens: u64,
|
||||
}
|
||||
|
||||
async fn process_sse<S>(
|
||||
stream: S,
|
||||
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
||||
idle_timeout: Duration,
|
||||
) where
|
||||
S: Stream<Item = Result<Bytes>> + Unpin,
|
||||
{
|
||||
let mut stream = stream.eventsource();
|
||||
|
||||
// If the stream stays completely silent for an extended period treat it as disconnected.
|
||||
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
// The response id returned from the "complete" message.
|
||||
let mut response_id = None;
|
||||
let mut response_completed: Option<ResponseCompleted> = None;
|
||||
|
||||
loop {
|
||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||
@@ -233,9 +352,15 @@ where
|
||||
return;
|
||||
}
|
||||
Ok(None) => {
|
||||
match response_id {
|
||||
Some(response_id) => {
|
||||
let event = ResponseEvent::Completed { response_id };
|
||||
match response_completed {
|
||||
Some(ResponseCompleted {
|
||||
id: response_id,
|
||||
usage,
|
||||
}) => {
|
||||
let event = ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: usage.map(Into::into),
|
||||
};
|
||||
let _ = tx_event.send(Ok(event)).await;
|
||||
}
|
||||
None => {
|
||||
@@ -274,7 +399,7 @@ where
|
||||
// duplicated `output` array embedded in the `response.completed`
|
||||
// payload. That produced two concrete issues:
|
||||
// 1. No real‑time streaming – the user only saw output after the
|
||||
// entire turn had finished, which broke the “typing” UX and
|
||||
// entire turn had finished, which broke the "typing" UX and
|
||||
// made long‑running turns look stalled.
|
||||
// 2. Duplicate `function_call_output` items – both the
|
||||
// individual *and* the completed array were forwarded, which
|
||||
@@ -296,12 +421,54 @@ where
|
||||
return;
|
||||
}
|
||||
}
|
||||
"response.output_text.delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
let event = ResponseEvent::OutputTextDelta(delta);
|
||||
if tx_event.send(Ok(event)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
"response.reasoning_summary_text.delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
let event = ResponseEvent::ReasoningSummaryDelta(delta);
|
||||
if tx_event.send(Ok(event)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
"response.reasoning_text.delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
let event = ResponseEvent::ReasoningContentDelta(delta);
|
||||
if tx_event.send(Ok(event)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
"response.created" => {
|
||||
if event.response.is_some() {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::Created {})).await;
|
||||
}
|
||||
}
|
||||
"response.failed" => {
|
||||
if let Some(resp_val) = event.response {
|
||||
let error = resp_val
|
||||
.get("error")
|
||||
.and_then(|v| v.get("message"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("response.failed event received");
|
||||
|
||||
let _ = tx_event
|
||||
.send(Err(CodexErr::Stream(error.to_string())))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
// Final response completed – includes array of output items & id
|
||||
"response.completed" => {
|
||||
if let Some(resp_val) = event.response {
|
||||
match serde_json::from_value::<ResponseCompleted>(resp_val) {
|
||||
Ok(r) => {
|
||||
response_id = Some(r.id);
|
||||
response_completed = Some(r);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("failed to parse ResponseCompleted: {e}");
|
||||
@@ -311,14 +478,11 @@ where
|
||||
};
|
||||
}
|
||||
"response.content_part.done"
|
||||
| "response.created"
|
||||
| "response.function_call_arguments.delta"
|
||||
| "response.in_progress"
|
||||
| "response.output_item.added"
|
||||
| "response.output_text.delta"
|
||||
| "response.output_text.done"
|
||||
| "response.reasoning_summary_part.added"
|
||||
| "response.reasoning_summary_text.delta"
|
||||
| "response.reasoning_summary_text.done" => {
|
||||
// Currently, we ignore these events, but we handle them
|
||||
// separately to skip the logging message in the `other` case.
|
||||
@@ -329,8 +493,11 @@ where
|
||||
}
|
||||
|
||||
/// used in tests to stream from a text SSE file
|
||||
async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
async fn stream_from_fixture(
|
||||
path: impl AsRef<Path>,
|
||||
provider: ModelProviderInfo,
|
||||
) -> Result<ResponseStream> {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
let f = std::fs::File::open(path.as_ref())?;
|
||||
let lines = std::io::BufReader::new(f).lines();
|
||||
|
||||
@@ -343,6 +510,302 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
|
||||
|
||||
let rdr = std::io::Cursor::new(content);
|
||||
let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);
|
||||
tokio::spawn(process_sse(stream, tx_event));
|
||||
tokio::spawn(process_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
));
|
||||
Ok(ResponseStream { rx_event })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_test::io::Builder as IoBuilder;
|
||||
use tokio_util::io::ReaderStream;
|
||||
|
||||
// ────────────────────────────
|
||||
// Helpers
|
||||
// ────────────────────────────
|
||||
|
||||
/// Runs the SSE parser on pre-chunked byte slices and returns every event
|
||||
/// (including any final `Err` from a stream-closure check).
|
||||
async fn collect_events(
|
||||
chunks: &[&[u8]],
|
||||
provider: ModelProviderInfo,
|
||||
) -> Vec<Result<ResponseEvent>> {
|
||||
let mut builder = IoBuilder::new();
|
||||
for chunk in chunks {
|
||||
builder.read(chunk);
|
||||
}
|
||||
|
||||
let reader = builder.build();
|
||||
let stream = ReaderStream::new(reader).map_err(CodexErr::Io);
|
||||
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(16);
|
||||
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
|
||||
|
||||
let mut events = Vec::new();
|
||||
while let Some(ev) = rx.recv().await {
|
||||
events.push(ev);
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
/// Builds an in-memory SSE stream from JSON fixtures and returns only the
|
||||
/// successfully parsed events (panics on internal channel errors).
|
||||
async fn run_sse(
|
||||
events: Vec<serde_json::Value>,
|
||||
provider: ModelProviderInfo,
|
||||
) -> Vec<ResponseEvent> {
|
||||
let mut body = String::new();
|
||||
for e in events {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
body.push_str(&format!("event: {kind}\n\n"));
|
||||
} else {
|
||||
body.push_str(&format!("event: {kind}\ndata: {e}\n\n"));
|
||||
}
|
||||
}
|
||||
|
||||
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8);
|
||||
let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);
|
||||
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
|
||||
|
||||
let mut out = Vec::new();
|
||||
while let Some(ev) = rx.recv().await {
|
||||
out.push(ev.expect("channel closed"));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// ────────────────────────────
|
||||
// Tests from `implement-test-for-responses-api-sse-parser`
|
||||
// ────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn parses_items_and_completed() {
|
||||
let item1 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let item2 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "World"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let completed = json!({
|
||||
"type": "response.completed",
|
||||
"response": { "id": "resp1" }
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
|
||||
let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n");
|
||||
let sse3 = format!("event: response.completed\ndata: {completed}\n\n");
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: Some("https://test.com".to_string()),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let events = collect_events(
|
||||
&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],
|
||||
provider,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(events.len(), 3);
|
||||
|
||||
matches!(
|
||||
&events[0],
|
||||
Ok(ResponseEvent::OutputItemDone(ResponseItem::Message { role, .. }))
|
||||
if role == "assistant"
|
||||
);
|
||||
|
||||
matches!(
|
||||
&events[1],
|
||||
Ok(ResponseEvent::OutputItemDone(ResponseItem::Message { role, .. }))
|
||||
if role == "assistant"
|
||||
);
|
||||
|
||||
match &events[2] {
|
||||
Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}) => {
|
||||
assert_eq!(response_id, "resp1");
|
||||
assert!(token_usage.is_none());
|
||||
}
|
||||
other => panic!("unexpected third event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn error_when_missing_completed() {
|
||||
let item1 = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "Hello"}]
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: Some("https://test.com".to_string()),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let events = collect_events(&[sse1.as_bytes()], provider).await;
|
||||
|
||||
assert_eq!(events.len(), 2);
|
||||
|
||||
matches!(events[0], Ok(ResponseEvent::OutputItemDone(_)));
|
||||
|
||||
match &events[1] {
|
||||
Err(CodexErr::Stream(msg)) => {
|
||||
assert_eq!(msg, "stream closed before response.completed")
|
||||
}
|
||||
other => panic!("unexpected second event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────
|
||||
// Table-driven test from `main`
|
||||
// ────────────────────────────
|
||||
|
||||
/// Verifies that the adapter produces the right `ResponseEvent` for a
|
||||
/// variety of incoming `type` values.
|
||||
#[tokio::test]
|
||||
async fn table_driven_event_kinds() {
|
||||
struct TestCase {
|
||||
name: &'static str,
|
||||
event: serde_json::Value,
|
||||
expect_first: fn(&ResponseEvent) -> bool,
|
||||
expected_len: usize,
|
||||
}
|
||||
|
||||
fn is_created(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::Created)
|
||||
}
|
||||
fn is_output(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::OutputItemDone(_))
|
||||
}
|
||||
fn is_completed(ev: &ResponseEvent) -> bool {
|
||||
matches!(ev, ResponseEvent::Completed { .. })
|
||||
}
|
||||
|
||||
let completed = json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "c",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
});
|
||||
|
||||
let cases = vec![
|
||||
TestCase {
|
||||
name: "created",
|
||||
event: json!({"type": "response.created", "response": {}}),
|
||||
expect_first: is_created,
|
||||
expected_len: 2,
|
||||
},
|
||||
TestCase {
|
||||
name: "output_item.done",
|
||||
event: json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "output_text", "text": "hi"}
|
||||
]
|
||||
}
|
||||
}),
|
||||
expect_first: is_output,
|
||||
expected_len: 2,
|
||||
},
|
||||
TestCase {
|
||||
name: "unknown",
|
||||
event: json!({"type": "response.new_tool_event"}),
|
||||
expect_first: is_completed,
|
||||
expected_len: 1,
|
||||
},
|
||||
];
|
||||
|
||||
for case in cases {
|
||||
let mut evs = vec![case.event];
|
||||
evs.push(completed.clone());
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: Some("https://test.com".to_string()),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let out = run_sse(evs, provider).await;
|
||||
assert_eq!(out.len(), case.expected_len, "case {}", case.name);
|
||||
assert!(
|
||||
(case.expect_first)(&out[0]),
|
||||
"first event mismatch in case {}",
|
||||
case.name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,19 @@
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::Result;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::models::ContentItem;
|
||||
use crate::models::ResponseItem;
|
||||
use crate::openai_tools::OpenAiTool;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::TokenUsage;
|
||||
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
||||
use futures::Stream;
|
||||
use serde::Serialize;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
@@ -16,42 +23,127 @@ use tokio::sync::mpsc;
|
||||
/// with this content.
|
||||
const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
|
||||
|
||||
/// wraps environment context message in a tag for the model to parse more easily.
|
||||
const ENVIRONMENT_CONTEXT_START: &str = "<environment_context>\n\n";
|
||||
const ENVIRONMENT_CONTEXT_END: &str = "\n\n</environment_context>";
|
||||
|
||||
/// wraps user instructions message in a tag for the model to parse more easily.
|
||||
const USER_INSTRUCTIONS_START: &str = "<user_instructions>\n\n";
|
||||
const USER_INSTRUCTIONS_END: &str = "\n\n</user_instructions>";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct EnvironmentContext {
|
||||
pub cwd: PathBuf,
|
||||
pub approval_policy: AskForApproval,
|
||||
pub sandbox_policy: SandboxPolicy,
|
||||
}
|
||||
|
||||
impl Display for EnvironmentContext {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
writeln!(
|
||||
f,
|
||||
"Current working directory: {}",
|
||||
self.cwd.to_string_lossy()
|
||||
)?;
|
||||
writeln!(f, "Approval policy: {}", self.approval_policy)?;
|
||||
writeln!(f, "Sandbox policy: {}", self.sandbox_policy)?;
|
||||
|
||||
let network_access = match self.sandbox_policy.clone() {
|
||||
SandboxPolicy::DangerFullAccess => "enabled",
|
||||
SandboxPolicy::ReadOnly => "restricted",
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
|
||||
if network_access {
|
||||
"enabled"
|
||||
} else {
|
||||
"restricted"
|
||||
}
|
||||
}
|
||||
};
|
||||
writeln!(f, "Network access: {network_access}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// API request payload for a single model turn.
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct Prompt {
|
||||
/// Conversation context input items.
|
||||
pub input: Vec<ResponseItem>,
|
||||
/// Optional previous response ID (when storage is enabled).
|
||||
pub prev_id: Option<String>,
|
||||
/// Optional instructions from the user to amend to the built-in agent
|
||||
/// instructions.
|
||||
pub user_instructions: Option<String>,
|
||||
/// Whether to store response on server side (disable_response_storage = !store).
|
||||
pub store: bool,
|
||||
|
||||
/// Additional tools sourced from external MCP servers. Note each key is
|
||||
/// the "fully qualified" tool name (i.e., prefixed with the server name),
|
||||
/// which should be reported to the model in place of Tool::name.
|
||||
pub extra_tools: HashMap<String, mcp_types::Tool>,
|
||||
/// A list of key-value pairs that will be added as a developer message
|
||||
/// for the model to use
|
||||
pub environment_context: Option<EnvironmentContext>,
|
||||
|
||||
/// Tools available to the model, including additional tools sourced from
|
||||
/// external MCP servers.
|
||||
pub tools: Vec<OpenAiTool>,
|
||||
|
||||
/// Optional override for the built-in BASE_INSTRUCTIONS.
|
||||
pub base_instructions_override: Option<String>,
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<str> {
|
||||
let mut sections: Vec<&str> = vec![BASE_INSTRUCTIONS];
|
||||
if let Some(ref user) = self.user_instructions {
|
||||
sections.push(user);
|
||||
}
|
||||
if model.starts_with("gpt-4.1") {
|
||||
pub(crate) fn get_full_instructions(&self, model: &ModelFamily) -> Cow<'_, str> {
|
||||
let base = self
|
||||
.base_instructions_override
|
||||
.as_deref()
|
||||
.unwrap_or(BASE_INSTRUCTIONS);
|
||||
let mut sections: Vec<&str> = vec![base];
|
||||
if model.needs_special_apply_patch_instructions {
|
||||
sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
|
||||
}
|
||||
Cow::Owned(sections.join("\n"))
|
||||
}
|
||||
|
||||
fn get_formatted_user_instructions(&self) -> Option<String> {
|
||||
self.user_instructions
|
||||
.as_ref()
|
||||
.map(|ui| format!("{USER_INSTRUCTIONS_START}{ui}{USER_INSTRUCTIONS_END}"))
|
||||
}
|
||||
|
||||
fn get_formatted_environment_context(&self) -> Option<String> {
|
||||
self.environment_context
|
||||
.as_ref()
|
||||
.map(|ec| format!("{ENVIRONMENT_CONTEXT_START}{ec}{ENVIRONMENT_CONTEXT_END}"))
|
||||
}
|
||||
|
||||
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
|
||||
let mut input_with_instructions = Vec::with_capacity(self.input.len() + 2);
|
||||
if let Some(ec) = self.get_formatted_environment_context() {
|
||||
input_with_instructions.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: ec }],
|
||||
});
|
||||
}
|
||||
if let Some(ui) = self.get_formatted_user_instructions() {
|
||||
input_with_instructions.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: ui }],
|
||||
});
|
||||
}
|
||||
input_with_instructions.extend(self.input.clone());
|
||||
input_with_instructions
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
Completed { response_id: String },
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
},
|
||||
OutputTextDelta(String),
|
||||
ReasoningSummaryDelta(String),
|
||||
ReasoningContentDelta(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -119,22 +211,20 @@ pub(crate) struct ResponsesApiRequest<'a> {
|
||||
pub(crate) tool_choice: &'static str,
|
||||
pub(crate) parallel_tool_calls: bool,
|
||||
pub(crate) reasoning: Option<Reasoning>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) previous_response_id: Option<String>,
|
||||
/// true when using the Responses API.
|
||||
pub(crate) store: bool,
|
||||
pub(crate) stream: bool,
|
||||
pub(crate) include: Vec<String>,
|
||||
}
|
||||
|
||||
pub(crate) fn create_reasoning_param_for_request(
|
||||
model: &str,
|
||||
model_family: &ModelFamily,
|
||||
effort: ReasoningEffortConfig,
|
||||
summary: ReasoningSummaryConfig,
|
||||
) -> Option<Reasoning> {
|
||||
let effort: Option<OpenAiReasoningEffort> = effort.into();
|
||||
let effort = effort?;
|
||||
|
||||
if model_supports_reasoning_summaries(model) {
|
||||
if model_family.supports_reasoning_summaries {
|
||||
let effort: Option<OpenAiReasoningEffort> = effort.into();
|
||||
let effort = effort?;
|
||||
Some(Reasoning {
|
||||
effort,
|
||||
summary: summary.into(),
|
||||
@@ -144,22 +234,6 @@ pub(crate) fn create_reasoning_param_for_request(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn model_supports_reasoning_summaries(model: &str) -> bool {
|
||||
// Currently, we hardcode this rule to decide whether enable reasoning.
|
||||
// We expect reasoning to apply only to OpenAI models, but we do not want
|
||||
// users to have to mess with their config to disable reasoning for models
|
||||
// that do not support it, such as `gpt-4.1`.
|
||||
//
|
||||
// Though if a user is using Codex with non-OpenAI models that, say, happen
|
||||
// to start with "o", then they can set `model_reasoning_effort = "none` in
|
||||
// config.toml to disable reasoning.
|
||||
//
|
||||
// Ultimately, this should also be configurable in config.toml, but we
|
||||
// need to have defaults that "just work." Perhaps we could have a
|
||||
// "reasoning models pattern" as part of ModelProviderInfo?
|
||||
model.starts_with("o") || model.starts_with("codex")
|
||||
}
|
||||
|
||||
pub(crate) struct ResponseStream {
|
||||
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
|
||||
}
|
||||
@@ -171,3 +245,23 @@ impl Stream for ResponseStream {
|
||||
self.rx_event.poll_recv(cx)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
use crate::model_family::find_family_for_model;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn get_full_instructions_no_user_content() {
|
||||
let prompt = Prompt {
|
||||
user_instructions: Some("custom instruction".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
let expected = format!("{BASE_INSTRUCTIONS}\n{APPLY_PATCH_TOOL_INSTRUCTIONS}");
|
||||
let model_family = find_family_for_model("gpt-4.1").expect("known model slug");
|
||||
let full = prompt.get_full_instructions(&model_family);
|
||||
assert_eq!(full, expected);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,20 +1,37 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::Codex;
|
||||
use crate::CodexSpawnOk;
|
||||
use crate::config::Config;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::util::notify_on_sigint;
|
||||
use codex_login::load_auth;
|
||||
use tokio::sync::Notify;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Represents an active Codex conversation, including the first event
|
||||
/// (which is [`EventMsg::SessionConfigured`]).
|
||||
pub struct CodexConversation {
|
||||
pub codex: Codex,
|
||||
pub session_id: Uuid,
|
||||
pub session_configured: Event,
|
||||
pub ctrl_c: Arc<Notify>,
|
||||
}
|
||||
|
||||
/// Spawn a new [`Codex`] and initialize the session.
|
||||
///
|
||||
/// Returns the wrapped [`Codex`] **and** the `SessionInitialized` event that
|
||||
/// is received as a response to the initial `ConfigureSession` submission so
|
||||
/// that callers can surface the information to the UI.
|
||||
pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Notify>)> {
|
||||
pub async fn init_codex(config: Config) -> anyhow::Result<CodexConversation> {
|
||||
let ctrl_c = notify_on_sigint();
|
||||
let (codex, init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
|
||||
let auth = load_auth(&config.codex_home, true)?;
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
init_id,
|
||||
session_id,
|
||||
} = Codex::spawn(config, auth, ctrl_c.clone()).await?;
|
||||
|
||||
// The first event must be `SessionInitialized`. Validate and forward it to
|
||||
// the caller so that they can display it in the conversation history.
|
||||
@@ -33,5 +50,10 @@ pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Not
|
||||
));
|
||||
}
|
||||
|
||||
Ok((codex, event, ctrl_c))
|
||||
Ok(CodexConversation {
|
||||
codex,
|
||||
session_id,
|
||||
session_configured: event,
|
||||
ctrl_c,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3,15 +3,19 @@ use crate::config_types::History;
|
||||
use crate::config_types::McpServerConfig;
|
||||
use crate::config_types::ReasoningEffort;
|
||||
use crate::config_types::ReasoningSummary;
|
||||
use crate::config_types::SandboxMode;
|
||||
use crate::config_types::SandboxWorkplaceWrite;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::config_types::ShellEnvironmentPolicyToml;
|
||||
use crate::config_types::Tui;
|
||||
use crate::config_types::UriBasedFileOpener;
|
||||
use crate::flags::OPENAI_DEFAULT_MODEL;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPermission;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use dirs::home_dir;
|
||||
use serde::Deserialize;
|
||||
@@ -31,6 +35,14 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Key into the model_providers map that specifies which provider to use.
|
||||
pub model_provider_id: String,
|
||||
|
||||
@@ -49,13 +61,20 @@ pub struct Config {
|
||||
/// users are only interested in the final agent responses.
|
||||
pub hide_agent_reasoning: bool,
|
||||
|
||||
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
||||
/// Defaults to `false`.
|
||||
pub show_raw_agent_reasoning: bool,
|
||||
|
||||
/// Disable server-side response storage (sends the full conversation
|
||||
/// context with every request). Currently necessary for OpenAI customers
|
||||
/// who have opted into Zero Data Retention (ZDR).
|
||||
pub disable_response_storage: bool,
|
||||
|
||||
/// User-provided instructions from instructions.md.
|
||||
pub instructions: Option<String>,
|
||||
/// User-provided instructions from AGENTS.md.
|
||||
pub user_instructions: Option<String>,
|
||||
|
||||
/// Base instructions override.
|
||||
pub base_instructions: Option<String>,
|
||||
|
||||
/// Optional external notifier command. When set, Codex will spawn this
|
||||
/// program after each completed *turn* (i.e. when the agent finishes
|
||||
@@ -122,6 +141,18 @@ pub struct Config {
|
||||
/// If not "none", the value to use for `reasoning.summary` when making a
|
||||
/// request using the Responses API.
|
||||
pub model_reasoning_summary: ReasoningSummary,
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: String,
|
||||
|
||||
/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
|
||||
pub experimental_resume: Option<PathBuf>,
|
||||
|
||||
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
|
||||
pub include_plan_tool: bool,
|
||||
|
||||
/// The value for the `originator` header included with Responses API requests.
|
||||
pub internal_originator: Option<String>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -235,17 +266,23 @@ pub struct ConfigToml {
|
||||
/// Provider to use from the model_providers map.
|
||||
pub model_provider: Option<String>,
|
||||
|
||||
/// Size of the context window for the model, in tokens.
|
||||
pub model_context_window: Option<u64>,
|
||||
|
||||
/// Maximum number of output tokens.
|
||||
pub model_max_output_tokens: Option<u64>,
|
||||
|
||||
/// Default approval policy for executing commands.
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
|
||||
#[serde(default)]
|
||||
pub shell_environment_policy: ShellEnvironmentPolicyToml,
|
||||
|
||||
// The `default` attribute ensures that the field is treated as `None` when
|
||||
// the key is omitted from the TOML. Without it, Serde treats the field as
|
||||
// required because we supply a custom deserializer.
|
||||
#[serde(default, deserialize_with = "deserialize_sandbox_permissions")]
|
||||
pub sandbox_permissions: Option<Vec<SandboxPermission>>,
|
||||
/// Sandbox mode to use.
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
|
||||
/// Sandbox configuration to apply if `sandbox` is `WorkspaceWrite`.
|
||||
pub sandbox_workspace_write: Option<SandboxWorkplaceWrite>,
|
||||
|
||||
/// Disable server-side response storage (sends the full conversation
|
||||
/// context with every request). Currently necessary for OpenAI customers
|
||||
@@ -292,33 +329,47 @@ pub struct ConfigToml {
|
||||
/// UI/output. Defaults to `false`.
|
||||
pub hide_agent_reasoning: Option<bool>,
|
||||
|
||||
/// When set to `true`, `AgentReasoningRawContentEvent` events will be shown in the UI/output.
|
||||
/// Defaults to `false`.
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
|
||||
pub model_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub model_reasoning_summary: Option<ReasoningSummary>,
|
||||
|
||||
/// Override to force-enable reasoning summaries for the configured model.
|
||||
pub model_supports_reasoning_summaries: Option<bool>,
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
|
||||
/// Experimental rollout resume path (absolute path to .jsonl; undocumented).
|
||||
pub experimental_resume: Option<PathBuf>,
|
||||
|
||||
/// Experimental path to a file whose contents replace the built-in BASE_INSTRUCTIONS.
|
||||
pub experimental_instructions_file: Option<PathBuf>,
|
||||
|
||||
/// The value for the `originator` header included with Responses API requests.
|
||||
pub internal_originator: Option<String>,
|
||||
}
|
||||
|
||||
fn deserialize_sandbox_permissions<'de, D>(
|
||||
deserializer: D,
|
||||
) -> Result<Option<Vec<SandboxPermission>>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let permissions: Option<Vec<String>> = Option::deserialize(deserializer)?;
|
||||
|
||||
match permissions {
|
||||
Some(raw_permissions) => {
|
||||
let base_path = find_codex_home().map_err(serde::de::Error::custom)?;
|
||||
|
||||
let converted = raw_permissions
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
parse_sandbox_permission_with_base_path(&raw, base_path.clone())
|
||||
.map_err(serde::de::Error::custom)
|
||||
})
|
||||
.collect::<Result<Vec<_>, D::Error>>()?;
|
||||
|
||||
Ok(Some(converted))
|
||||
impl ConfigToml {
|
||||
/// Derive the effective sandbox policy from the configuration.
|
||||
fn derive_sandbox_policy(&self, sandbox_mode_override: Option<SandboxMode>) -> SandboxPolicy {
|
||||
let resolved_sandbox_mode = sandbox_mode_override
|
||||
.or(self.sandbox_mode)
|
||||
.unwrap_or_default();
|
||||
match resolved_sandbox_mode {
|
||||
SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
|
||||
SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
|
||||
Some(s) => SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: s.writable_roots.clone(),
|
||||
network_access: s.network_access,
|
||||
include_default_writable_roots: true,
|
||||
},
|
||||
None => SandboxPolicy::new_workspace_write_policy(),
|
||||
},
|
||||
SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,10 +379,14 @@ pub struct ConfigOverrides {
|
||||
pub model: Option<String>,
|
||||
pub cwd: Option<PathBuf>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox_policy: Option<SandboxPolicy>,
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
pub model_provider: Option<String>,
|
||||
pub config_profile: Option<String>,
|
||||
pub codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub base_instructions: Option<String>,
|
||||
pub include_plan_tool: Option<bool>,
|
||||
pub disable_response_storage: Option<bool>,
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -342,23 +397,27 @@ impl Config {
|
||||
overrides: ConfigOverrides,
|
||||
codex_home: PathBuf,
|
||||
) -> std::io::Result<Self> {
|
||||
let instructions = Self::load_instructions(Some(&codex_home));
|
||||
let user_instructions = Self::load_instructions(Some(&codex_home));
|
||||
|
||||
// Destructure ConfigOverrides fully to ensure all overrides are applied.
|
||||
let ConfigOverrides {
|
||||
model,
|
||||
cwd,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
sandbox_mode,
|
||||
model_provider,
|
||||
config_profile: config_profile_key,
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
disable_response_storage,
|
||||
show_raw_agent_reasoning,
|
||||
} = overrides;
|
||||
|
||||
let config_profile = match config_profile_key.or(cfg.profile) {
|
||||
let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
|
||||
Some(key) => cfg
|
||||
.profiles
|
||||
.get(&key)
|
||||
.get(key)
|
||||
.ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
@@ -369,20 +428,7 @@ impl Config {
|
||||
None => ConfigProfile::default(),
|
||||
};
|
||||
|
||||
let sandbox_policy = match sandbox_policy {
|
||||
Some(sandbox_policy) => sandbox_policy,
|
||||
None => {
|
||||
// Derive a SandboxPolicy from the permissions in the config.
|
||||
match cfg.sandbox_permissions {
|
||||
// Note this means the user can explicitly set permissions
|
||||
// to the empty list in the config file, granting it no
|
||||
// permissions whatsoever.
|
||||
Some(permissions) => SandboxPolicy::from(permissions),
|
||||
// Default to read only rather than completely locked down.
|
||||
None => SandboxPolicy::new_read_only_policy(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode);
|
||||
|
||||
let mut model_providers = built_in_model_providers();
|
||||
// Merge user-defined providers into the built-in list.
|
||||
@@ -427,11 +473,51 @@ impl Config {
|
||||
|
||||
let history = cfg.history.unwrap_or_default();
|
||||
|
||||
let model = model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model);
|
||||
let model_family = find_family_for_model(&model).unwrap_or_else(|| {
|
||||
let supports_reasoning_summaries =
|
||||
cfg.model_supports_reasoning_summaries.unwrap_or(false);
|
||||
ModelFamily {
|
||||
slug: model.clone(),
|
||||
family: model.clone(),
|
||||
needs_special_apply_patch_instructions: false,
|
||||
supports_reasoning_summaries,
|
||||
uses_local_shell_tool: false,
|
||||
requires_chatgpt_auth: false,
|
||||
}
|
||||
});
|
||||
|
||||
let openai_model_info = get_model_info(&model_family);
|
||||
let model_context_window = cfg
|
||||
.model_context_window
|
||||
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
|
||||
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
|
||||
openai_model_info
|
||||
.as_ref()
|
||||
.map(|info| info.max_output_tokens)
|
||||
});
|
||||
|
||||
let experimental_resume = cfg.experimental_resume;
|
||||
|
||||
// Load base instructions override from a file if specified. If the
|
||||
// path is relative, resolve it against the effective cwd so the
|
||||
// behaviour matches other path-like config values.
|
||||
let experimental_instructions_path = config_profile
|
||||
.experimental_instructions_file
|
||||
.as_ref()
|
||||
.or(cfg.experimental_instructions_file.as_ref());
|
||||
let file_base_instructions =
|
||||
Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?;
|
||||
let base_instructions = base_instructions.or(file_base_instructions);
|
||||
|
||||
let config = Self {
|
||||
model: model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
.unwrap_or_else(default_model),
|
||||
model,
|
||||
model_family,
|
||||
model_context_window,
|
||||
model_max_output_tokens,
|
||||
model_provider_id,
|
||||
model_provider,
|
||||
cwd: resolved_cwd,
|
||||
@@ -444,9 +530,11 @@ impl Config {
|
||||
disable_response_storage: config_profile
|
||||
.disable_response_storage
|
||||
.or(cfg.disable_response_storage)
|
||||
.or(disable_response_storage)
|
||||
.unwrap_or(false),
|
||||
notify: cfg.notify,
|
||||
instructions,
|
||||
user_instructions,
|
||||
base_instructions,
|
||||
mcp_servers: cfg.mcp_servers,
|
||||
model_providers,
|
||||
project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
|
||||
@@ -457,8 +545,27 @@ impl Config {
|
||||
codex_linux_sandbox_exe,
|
||||
|
||||
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
|
||||
model_reasoning_effort: cfg.model_reasoning_effort.unwrap_or_default(),
|
||||
model_reasoning_summary: cfg.model_reasoning_summary.unwrap_or_default(),
|
||||
show_raw_agent_reasoning: cfg
|
||||
.show_raw_agent_reasoning
|
||||
.or(show_raw_agent_reasoning)
|
||||
.unwrap_or(false),
|
||||
model_reasoning_effort: config_profile
|
||||
.model_reasoning_effort
|
||||
.or(cfg.model_reasoning_effort)
|
||||
.unwrap_or_default(),
|
||||
model_reasoning_summary: config_profile
|
||||
.model_reasoning_summary
|
||||
.or(cfg.model_reasoning_summary)
|
||||
.unwrap_or_default(),
|
||||
|
||||
chatgpt_base_url: config_profile
|
||||
.chatgpt_base_url
|
||||
.or(cfg.chatgpt_base_url)
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
|
||||
experimental_resume,
|
||||
include_plan_tool: include_plan_tool.unwrap_or(false),
|
||||
internal_originator: cfg.internal_originator,
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -469,7 +576,7 @@ impl Config {
|
||||
None => return None,
|
||||
};
|
||||
|
||||
p.push("instructions.md");
|
||||
p.push("AGENTS.md");
|
||||
std::fs::read_to_string(&p).ok().and_then(|s| {
|
||||
let s = s.trim();
|
||||
if s.is_empty() {
|
||||
@@ -479,6 +586,48 @@ impl Config {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn get_base_instructions(
|
||||
path: Option<&PathBuf>,
|
||||
cwd: &Path,
|
||||
) -> std::io::Result<Option<String>> {
|
||||
let p = match path.as_ref() {
|
||||
None => return Ok(None),
|
||||
Some(p) => p,
|
||||
};
|
||||
|
||||
// Resolve relative paths against the provided cwd to make CLI
|
||||
// overrides consistent regardless of where the process was launched
|
||||
// from.
|
||||
let full_path = if p.is_relative() {
|
||||
cwd.join(p)
|
||||
} else {
|
||||
p.to_path_buf()
|
||||
};
|
||||
|
||||
let contents = std::fs::read_to_string(&full_path).map_err(|e| {
|
||||
std::io::Error::new(
|
||||
e.kind(),
|
||||
format!(
|
||||
"failed to read experimental instructions file {}: {e}",
|
||||
full_path.display()
|
||||
),
|
||||
)
|
||||
})?;
|
||||
|
||||
let s = contents.trim().to_string();
|
||||
if s.is_empty() {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"experimental instructions file is empty: {}",
|
||||
full_path.display()
|
||||
),
|
||||
))
|
||||
} else {
|
||||
Ok(Some(s))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_model() -> String {
|
||||
@@ -493,7 +642,7 @@ fn default_model() -> String {
|
||||
/// function will Err if the path does not exist.
|
||||
/// - If `CODEX_HOME` is not set, this function does not verify that the
|
||||
/// directory exists.
|
||||
fn find_codex_home() -> std::io::Result<PathBuf> {
|
||||
pub fn find_codex_home() -> std::io::Result<PathBuf> {
|
||||
// Honor the `CODEX_HOME` environment variable when it is set to allow users
|
||||
// (and tests) to override the default location.
|
||||
if let Ok(val) = std::env::var("CODEX_HOME") {
|
||||
@@ -520,50 +669,6 @@ pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
|
||||
Ok(p)
|
||||
}
|
||||
|
||||
pub fn parse_sandbox_permission_with_base_path(
|
||||
raw: &str,
|
||||
base_path: PathBuf,
|
||||
) -> std::io::Result<SandboxPermission> {
|
||||
use SandboxPermission::*;
|
||||
|
||||
if let Some(path) = raw.strip_prefix("disk-write-folder=") {
|
||||
return if path.is_empty() {
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"--sandbox-permission disk-write-folder=<PATH> requires a non-empty PATH",
|
||||
))
|
||||
} else {
|
||||
use path_absolutize::*;
|
||||
|
||||
let file = PathBuf::from(path);
|
||||
let absolute_path = if file.is_relative() {
|
||||
file.absolutize_from(base_path)
|
||||
} else {
|
||||
file.absolutize()
|
||||
}
|
||||
.map(|path| path.into_owned())?;
|
||||
Ok(DiskWriteFolder {
|
||||
folder: absolute_path,
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
match raw {
|
||||
"disk-full-read-access" => Ok(DiskFullReadAccess),
|
||||
"disk-write-platform-user-temp-folder" => Ok(DiskWritePlatformUserTempFolder),
|
||||
"disk-write-platform-global-temp-folder" => Ok(DiskWritePlatformGlobalTempFolder),
|
||||
"disk-write-cwd" => Ok(DiskWriteCwd),
|
||||
"disk-full-write-access" => Ok(DiskFullWriteAccess),
|
||||
"network-full-access" => Ok(NetworkFullAccess),
|
||||
_ => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"`{raw}` is not a recognised permission.\nRun with `--help` to see the accepted values."
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
@@ -573,51 +678,14 @@ mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Verify that the `sandbox_permissions` field on `ConfigToml` correctly
|
||||
/// differentiates between a value that is completely absent in the
|
||||
/// provided TOML (i.e. `None`) and one that is explicitly specified as an
|
||||
/// empty array (i.e. `Some(vec![])`). This ensures that downstream logic
|
||||
/// that treats these two cases differently (default read-only policy vs a
|
||||
/// fully locked-down sandbox) continues to function.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_none_vs_empty_vec() {
|
||||
// Case 1: `sandbox_permissions` key is *absent* from the TOML source.
|
||||
let toml_source_without_key = "";
|
||||
let cfg_without_key: ConfigToml = toml::from_str(toml_source_without_key)
|
||||
.expect("TOML deserialization without key should succeed");
|
||||
assert!(cfg_without_key.sandbox_permissions.is_none());
|
||||
|
||||
// Case 2: `sandbox_permissions` is present but set to an *empty array*.
|
||||
let toml_source_with_empty = "sandbox_permissions = []";
|
||||
let cfg_with_empty: ConfigToml = toml::from_str(toml_source_with_empty)
|
||||
.expect("TOML deserialization with empty array should succeed");
|
||||
assert_eq!(Some(vec![]), cfg_with_empty.sandbox_permissions);
|
||||
|
||||
// Case 3: `sandbox_permissions` contains a non-empty list of valid values.
|
||||
let toml_source_with_values = r#"
|
||||
sandbox_permissions = ["disk-full-read-access", "network-full-access"]
|
||||
"#;
|
||||
let cfg_with_values: ConfigToml = toml::from_str(toml_source_with_values)
|
||||
.expect("TOML deserialization with valid permissions should succeed");
|
||||
|
||||
assert_eq!(
|
||||
Some(vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::NetworkFullAccess
|
||||
]),
|
||||
cfg_with_values.sandbox_permissions
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_parsing() {
|
||||
let history_with_persistence = r#"
|
||||
[history]
|
||||
persistence = "save-all"
|
||||
"#;
|
||||
let history_with_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_with_persistence_cfg = toml::from_str::<ConfigToml>(history_with_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::SaveAll,
|
||||
@@ -631,9 +699,8 @@ persistence = "save-all"
|
||||
persistence = "none"
|
||||
"#;
|
||||
|
||||
let history_no_persistence_cfg: ConfigToml =
|
||||
toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let history_no_persistence_cfg = toml::from_str::<ConfigToml>(history_no_persistence)
|
||||
.expect("TOML deserialization should succeed");
|
||||
assert_eq!(
|
||||
Some(History {
|
||||
persistence: HistoryPersistence::None,
|
||||
@@ -643,20 +710,57 @@ persistence = "none"
|
||||
);
|
||||
}
|
||||
|
||||
/// Deserializing a TOML string containing an *invalid* permission should
|
||||
/// fail with a helpful error rather than silently defaulting or
|
||||
/// succeeding.
|
||||
#[test]
|
||||
fn test_sandbox_permissions_illegal_value() {
|
||||
let toml_bad = r#"sandbox_permissions = ["not-a-real-permission"]"#;
|
||||
fn test_sandbox_config_parsing() {
|
||||
let sandbox_full_access = r#"
|
||||
sandbox_mode = "danger-full-access"
|
||||
|
||||
let err = toml::from_str::<ConfigToml>(toml_bad)
|
||||
.expect_err("Deserialization should fail for invalid permission");
|
||||
[sandbox_workspace_write]
|
||||
network_access = false # This should be ignored.
|
||||
"#;
|
||||
let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
sandbox_full_access_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
|
||||
// Make sure the error message contains the invalid value so users have
|
||||
// useful feedback.
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("not-a-real-permission"));
|
||||
let sandbox_read_only = r#"
|
||||
sandbox_mode = "read-only"
|
||||
|
||||
[sandbox_workspace_write]
|
||||
network_access = true # This should be ignored.
|
||||
"#;
|
||||
|
||||
let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::ReadOnly,
|
||||
sandbox_read_only_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
|
||||
let sandbox_workspace_write = r#"
|
||||
sandbox_mode = "workspace-write"
|
||||
|
||||
[sandbox_workspace_write]
|
||||
writable_roots = [
|
||||
"/tmp",
|
||||
]
|
||||
"#;
|
||||
|
||||
let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
|
||||
.expect("TOML deserialization should succeed");
|
||||
let sandbox_mode_override = None;
|
||||
assert_eq!(
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![PathBuf::from("/tmp")],
|
||||
network_access: false,
|
||||
include_default_writable_roots: true,
|
||||
},
|
||||
sandbox_workspace_write_cfg.derive_sandbox_policy(sandbox_mode_override)
|
||||
);
|
||||
}
|
||||
|
||||
struct PrecedenceTestFixture {
|
||||
@@ -681,8 +785,7 @@ persistence = "none"
|
||||
fn create_test_fixture() -> std::io::Result<PrecedenceTestFixture> {
|
||||
let toml = r#"
|
||||
model = "o3"
|
||||
approval_policy = "unless-allow-listed"
|
||||
sandbox_permissions = ["disk-full-read-access"]
|
||||
approval_policy = "untrusted"
|
||||
disable_response_storage = false
|
||||
|
||||
# Can be used to determine which profile to use if not specified by
|
||||
@@ -694,11 +797,16 @@ name = "OpenAI using Chat Completions"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
wire_api = "chat"
|
||||
request_max_retries = 4 # retry failed HTTP requests
|
||||
stream_max_retries = 10 # retry dropped SSE streams
|
||||
stream_idle_timeout_ms = 300000 # 5m idle timeout
|
||||
|
||||
[profiles.o3]
|
||||
model = "o3"
|
||||
model_provider = "openai"
|
||||
approval_policy = "never"
|
||||
model_reasoning_effort = "high"
|
||||
model_reasoning_summary = "detailed"
|
||||
|
||||
[profiles.gpt3]
|
||||
model = "gpt-3.5-turbo"
|
||||
@@ -725,10 +833,17 @@ disable_response_storage = true
|
||||
|
||||
let openai_chat_completions_provider = ModelProviderInfo {
|
||||
name: "OpenAI using Chat Completions".to_string(),
|
||||
base_url: "https://api.openai.com/v1".to_string(),
|
||||
base_url: Some("https://api.openai.com/v1".to_string()),
|
||||
env_key: Some("OPENAI_API_KEY".to_string()),
|
||||
wire_api: crate::WireApi::Chat,
|
||||
env_key_instructions: None,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(4),
|
||||
stream_max_retries: Some(10),
|
||||
stream_idle_timeout_ms: Some(300_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
let model_provider_map = {
|
||||
let mut model_provider_map = built_in_model_providers();
|
||||
@@ -759,7 +874,7 @@ disable_response_storage = true
|
||||
///
|
||||
/// 1. custom command-line argument, e.g. `--model o3`
|
||||
/// 2. as part of a profile, where the `--profile` is specified via a CLI
|
||||
/// (or in the config file itelf)
|
||||
/// (or in the config file itself)
|
||||
/// 3. as an entry in `config.toml`, e.g. `model = "o3"`
|
||||
/// 4. the default value for a required field defined in code, e.g.,
|
||||
/// `crate::flags::OPENAI_DEFAULT_MODEL`
|
||||
@@ -783,13 +898,16 @@ disable_response_storage = true
|
||||
assert_eq!(
|
||||
Config {
|
||||
model: "o3".to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -801,8 +919,14 @@ disable_response_storage = true
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
show_raw_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::High,
|
||||
model_reasoning_summary: ReasoningSummary::Detailed,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
internal_originator: None,
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -825,13 +949,16 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_gpt3_profile_config = Config {
|
||||
model: "gpt-3.5-turbo".to_string(),
|
||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
model_provider_id: "openai-chat-completions".to_string(),
|
||||
model_provider: fixture.openai_chat_completions_provider.clone(),
|
||||
approval_policy: AskForApproval::UnlessAllowListed,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: false,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -843,8 +970,14 @@ disable_response_storage = true
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
show_raw_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
internal_originator: None,
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -882,13 +1015,16 @@ disable_response_storage = true
|
||||
)?;
|
||||
let expected_zdr_profile_config = Config {
|
||||
model: "o3".to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
model_provider_id: "openai".to_string(),
|
||||
model_provider: fixture.openai_provider.clone(),
|
||||
approval_policy: AskForApproval::OnFailure,
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
shell_environment_policy: ShellEnvironmentPolicy::default(),
|
||||
disable_response_storage: true,
|
||||
instructions: None,
|
||||
user_instructions: None,
|
||||
notify: None,
|
||||
cwd: fixture.cwd(),
|
||||
mcp_servers: HashMap::new(),
|
||||
@@ -900,8 +1036,14 @@ disable_response_storage = true
|
||||
tui: Tui::default(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
hide_agent_reasoning: false,
|
||||
show_raw_agent_reasoning: false,
|
||||
model_reasoning_effort: ReasoningEffort::default(),
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
experimental_resume: None,
|
||||
base_instructions: None,
|
||||
include_plan_tool: false,
|
||||
internal_originator: None,
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
use serde::Deserialize;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::config_types::ReasoningEffort;
|
||||
use crate::config_types::ReasoningSummary;
|
||||
use crate::protocol::AskForApproval;
|
||||
|
||||
/// Collection of common configuration options that a user can define as a unit
|
||||
@@ -12,4 +15,8 @@ pub struct ConfigProfile {
|
||||
pub model_provider: Option<String>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub disable_response_storage: Option<bool>,
|
||||
pub model_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub model_reasoning_summary: Option<ReasoningSummary>,
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
pub experimental_instructions_file: Option<PathBuf>,
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
// definitions that do not contain business logic.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use strum_macros::Display;
|
||||
use wildmatch::WildMatchPattern;
|
||||
|
||||
@@ -75,19 +76,28 @@ pub enum HistoryPersistence {
|
||||
|
||||
/// Collection of settings that are specific to the TUI.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct Tui {
|
||||
/// By default, mouse capture is enabled in the TUI so that it is possible
|
||||
/// to scroll the conversation history with a mouse. This comes at the cost
|
||||
/// of not being able to use the mouse to select text in the TUI.
|
||||
/// (Most terminals support a modifier key to allow this. For example,
|
||||
/// text selection works in iTerm if you hold down the `Option` key while
|
||||
/// clicking and dragging.)
|
||||
///
|
||||
/// Setting this option to `true` disables mouse capture, so scrolling with
|
||||
/// the mouse is not possible, though the keyboard shortcuts e.g. `b` and
|
||||
/// `space` still work. This allows the user to select text in the TUI
|
||||
/// using the mouse without needing to hold down a modifier key.
|
||||
pub disable_mouse_capture: bool,
|
||||
pub struct Tui {}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, Copy, PartialEq, Default, Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum SandboxMode {
|
||||
#[serde(rename = "read-only")]
|
||||
#[default]
|
||||
ReadOnly,
|
||||
|
||||
#[serde(rename = "workspace-write")]
|
||||
WorkspaceWrite,
|
||||
|
||||
#[serde(rename = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
pub struct SandboxWorkplaceWrite {
|
||||
#[serde(default)]
|
||||
pub writable_roots: Vec<PathBuf>,
|
||||
#[serde(default)]
|
||||
pub network_access: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Default)]
|
||||
@@ -120,6 +130,8 @@ pub struct ShellEnvironmentPolicyToml {
|
||||
|
||||
/// List of regular expressions.
|
||||
pub include_only: Option<Vec<String>>,
|
||||
|
||||
pub experimental_use_profile: Option<bool>,
|
||||
}
|
||||
|
||||
pub type EnvironmentVariablePattern = WildMatchPattern<'*', '?'>;
|
||||
@@ -148,6 +160,9 @@ pub struct ShellEnvironmentPolicy {
|
||||
|
||||
/// Environment variable names to retain in the environment.
|
||||
pub include_only: Vec<EnvironmentVariablePattern>,
|
||||
|
||||
/// If true, the shell profile will be used to run the command.
|
||||
pub use_profile: bool,
|
||||
}
|
||||
|
||||
impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
|
||||
@@ -167,6 +182,7 @@ impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
|
||||
.into_iter()
|
||||
.map(|s| EnvironmentVariablePattern::new_case_insensitive(&s))
|
||||
.collect();
|
||||
let use_profile = toml.experimental_use_profile.unwrap_or(false);
|
||||
|
||||
Self {
|
||||
inherit,
|
||||
@@ -174,6 +190,7 @@ impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
|
||||
exclude,
|
||||
r#set,
|
||||
include_only,
|
||||
use_profile,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
use crate::models::ResponseItem;
|
||||
|
||||
/// Transcript of conversation history that is needed:
|
||||
/// - for ZDR clients for which previous_response_id is not available, so we
|
||||
/// must include the transcript with every API call. This must include each
|
||||
/// `function_call` and its corresponding `function_call_output`.
|
||||
/// - for clients using the "chat completions" API as opposed to the
|
||||
/// "responses" API.
|
||||
#[derive(Debug, Clone)]
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ConversationHistory {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
@@ -29,12 +24,83 @@ impl ConversationHistory {
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
for item in items {
|
||||
if is_api_message(&item) {
|
||||
// Note agent-loop.ts also does filtering on some of the fields.
|
||||
self.items.push(item.clone());
|
||||
if !is_api_message(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Merge adjacent assistant messages into a single history entry.
|
||||
// This prevents duplicates when a partial assistant message was
|
||||
// streamed into history earlier in the turn and the final full
|
||||
// message is recorded at turn end.
|
||||
match (&*item, self.items.last_mut()) {
|
||||
(
|
||||
ResponseItem::Message {
|
||||
role: new_role,
|
||||
content: new_content,
|
||||
..
|
||||
},
|
||||
Some(ResponseItem::Message {
|
||||
role: last_role,
|
||||
content: last_content,
|
||||
..
|
||||
}),
|
||||
) if new_role == "assistant" && last_role == "assistant" => {
|
||||
append_text_content(last_content, new_content);
|
||||
}
|
||||
_ => {
|
||||
self.items.push(item.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Append a text `delta` to the latest assistant message, creating a new
|
||||
/// assistant entry if none exists yet (e.g. first delta for this turn).
|
||||
pub(crate) fn append_assistant_text(&mut self, delta: &str) {
|
||||
match self.items.last_mut() {
|
||||
Some(ResponseItem::Message { role, content, .. }) if role == "assistant" => {
|
||||
append_text_delta(content, delta);
|
||||
}
|
||||
_ => {
|
||||
// Start a new assistant message with the delta.
|
||||
self.items.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![crate::models::ContentItem::OutputText {
|
||||
text: delta.to_string(),
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn keep_last_messages(&mut self, n: usize) {
|
||||
if n == 0 {
|
||||
self.items.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// Collect the last N message items (assistant/user), newest to oldest.
|
||||
let mut kept: Vec<ResponseItem> = Vec::with_capacity(n);
|
||||
for item in self.items.iter().rev() {
|
||||
if let ResponseItem::Message { role, content, .. } = item {
|
||||
kept.push(ResponseItem::Message {
|
||||
// we need to remove the id or the model will complain that messages are sent without
|
||||
// their reasonings
|
||||
id: None,
|
||||
role: role.clone(),
|
||||
content: content.clone(),
|
||||
});
|
||||
if kept.len() == n {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Preserve chronological order (oldest to newest) within the kept slice.
|
||||
kept.reverse();
|
||||
self.items = kept;
|
||||
}
|
||||
}
|
||||
|
||||
/// Anything that is not a system message or "reasoning" message is considered
|
||||
@@ -44,7 +110,145 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
ResponseItem::Message { role, .. } => role.as_str() != "system",
|
||||
ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::LocalShellCall { .. } => true,
|
||||
ResponseItem::Reasoning { .. } | ResponseItem::Other => false,
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to append the textual content from `src` into `dst` in place.
|
||||
fn append_text_content(
|
||||
dst: &mut Vec<crate::models::ContentItem>,
|
||||
src: &Vec<crate::models::ContentItem>,
|
||||
) {
|
||||
for c in src {
|
||||
if let crate::models::ContentItem::OutputText { text } = c {
|
||||
append_text_delta(dst, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Append a single text delta to the last OutputText item in `content`, or
|
||||
/// push a new OutputText item if none exists.
|
||||
fn append_text_delta(content: &mut Vec<crate::models::ContentItem>, delta: &str) {
|
||||
if let Some(crate::models::ContentItem::OutputText { text }) = content
|
||||
.iter_mut()
|
||||
.rev()
|
||||
.find(|c| matches!(c, crate::models::ContentItem::OutputText { .. }))
|
||||
{
|
||||
text.push_str(delta);
|
||||
} else {
|
||||
content.push(crate::models::ContentItem::OutputText {
|
||||
text: delta.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::models::ContentItem;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn user_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merges_adjacent_assistant_messages() {
|
||||
let mut h = ConversationHistory::default();
|
||||
let a1 = assistant_msg("Hello");
|
||||
let a2 = assistant_msg(", world!");
|
||||
h.record_items([&a1, &a2]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "Hello, world!".to_string()
|
||||
}]
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn append_assistant_text_creates_and_appends() {
|
||||
let mut h = ConversationHistory::default();
|
||||
h.append_assistant_text("Hello");
|
||||
h.append_assistant_text(", world");
|
||||
|
||||
// Now record a final full assistant message and verify it merges.
|
||||
let final_msg = assistant_msg("!");
|
||||
h.record_items([&final_msg]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "Hello, world!".to_string()
|
||||
}]
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ConversationHistory::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "system".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "ignored".to_string(),
|
||||
}],
|
||||
};
|
||||
h.record_items([&system, &ResponseItem::Other]);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hi".to_string()
|
||||
}]
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hello".to_string()
|
||||
}]
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,22 +6,29 @@ use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use async_channel::Sender;
|
||||
use tokio::io::AsyncRead;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::process::Child;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandOutputDeltaEvent;
|
||||
use crate::protocol::ExecOutputStream;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::seatbelt::spawn_command_under_seatbelt;
|
||||
use crate::spawn::StdioPolicy;
|
||||
use crate::spawn::spawn_child_async;
|
||||
use serde_bytes::ByteBuf;
|
||||
|
||||
// Maximum we send for each stream, which is either:
|
||||
// - 10KiB OR
|
||||
@@ -36,30 +43,20 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
|
||||
const SIGKILL_CODE: i32 = 9;
|
||||
const TIMEOUT_CODE: i32 = 64;
|
||||
|
||||
const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl");
|
||||
|
||||
/// When working with `sandbox-exec`, only consider `sandbox-exec` in `/usr/bin`
|
||||
/// to defend against an attacker trying to inject a malicious version on the
|
||||
/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
|
||||
/// already has root access.
|
||||
const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
|
||||
|
||||
/// Experimental environment variable that will be set to some non-empty value
|
||||
/// if both of the following are true:
|
||||
///
|
||||
/// 1. The process was spawned by Codex as part of a shell tool call.
|
||||
/// 2. SandboxPolicy.has_full_network_access() was false for the tool call.
|
||||
///
|
||||
/// We may try to have just one environment variable for all sandboxing
|
||||
/// attributes, so this may change in the future.
|
||||
pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ExecParams {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
impl ExecParams {
|
||||
pub fn timeout_duration(&self) -> Duration {
|
||||
Duration::from_millis(self.timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
@@ -73,23 +70,30 @@ pub enum SandboxType {
|
||||
LinuxSeccomp,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct StdoutStream {
|
||||
pub sub_id: String,
|
||||
pub call_id: String,
|
||||
pub tx_event: Sender<Event>,
|
||||
}
|
||||
|
||||
pub async fn process_exec_tool_call(
|
||||
params: ExecParams,
|
||||
sandbox_type: SandboxType,
|
||||
ctrl_c: Arc<Notify>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
codex_linux_sandbox_exe: &Option<PathBuf>,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
let start = Instant::now();
|
||||
|
||||
let raw_output_result = match sandbox_type {
|
||||
SandboxType::None => exec(params, sandbox_policy, ctrl_c).await,
|
||||
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
|
||||
{
|
||||
SandboxType::None => exec(params, sandbox_policy, ctrl_c, stdout_stream.clone()).await,
|
||||
SandboxType::MacosSeatbelt => {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
command, cwd, env, ..
|
||||
} = params;
|
||||
let child = spawn_command_under_seatbelt(
|
||||
command,
|
||||
@@ -99,14 +103,12 @@ pub async fn process_exec_tool_call(
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
consume_truncated_output(child, ctrl_c, timeout, stdout_stream.clone()).await
|
||||
}
|
||||
SandboxType::LinuxSeccomp => {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
command, cwd, env, ..
|
||||
} = params;
|
||||
|
||||
let codex_linux_sandbox_exe = codex_linux_sandbox_exe
|
||||
@@ -122,7 +124,7 @@ pub async fn process_exec_tool_call(
|
||||
)
|
||||
.await?;
|
||||
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
consume_truncated_output(child, ctrl_c, timeout, stdout_stream).await
|
||||
}
|
||||
};
|
||||
let duration = start.elapsed();
|
||||
@@ -142,11 +144,7 @@ pub async fn process_exec_tool_call(
|
||||
|
||||
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
|
||||
// NOTE(ragona): This is much less restrictive than the previous check. If we exec
|
||||
// a command, and it returns anything other than success, we assume that it may have
|
||||
// been a sandboxing error and allow the user to retry. (The user of course may choose
|
||||
// not to retry, or in a non-interactive mode, would automatically reject the approval.)
|
||||
if exit_code != 0 && sandbox_type != SandboxType::None {
|
||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Denied(
|
||||
exit_code, stdout, stderr,
|
||||
)));
|
||||
@@ -166,27 +164,6 @@ pub async fn process_exec_tool_call(
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn spawn_command_under_seatbelt(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let args = create_seatbelt_command_args(command, sandbox_policy, &cwd);
|
||||
let arg0 = None;
|
||||
spawn_child_async(
|
||||
PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
|
||||
args,
|
||||
arg0,
|
||||
cwd,
|
||||
sandbox_policy,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper
|
||||
/// (codex-linux-sandbox).
|
||||
///
|
||||
@@ -225,41 +202,20 @@ fn create_linux_sandbox_command_args(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Vec<String> {
|
||||
let mut linux_cmd: Vec<String> = vec![];
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
|
||||
|
||||
// Translate individual permissions.
|
||||
// Use high-level helper methods to infer flags when we cannot see the
|
||||
// exact permission list.
|
||||
if sandbox_policy.has_full_disk_read_access() {
|
||||
linux_cmd.extend(["-s", "disk-full-read-access"].map(String::from));
|
||||
}
|
||||
#[expect(clippy::expect_used)]
|
||||
let sandbox_policy_json =
|
||||
serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
|
||||
|
||||
if sandbox_policy.has_full_disk_write_access() {
|
||||
linux_cmd.extend(["-s", "disk-full-write-access"].map(String::from));
|
||||
} else {
|
||||
// Derive granular writable paths (includes cwd if `DiskWriteCwd` is
|
||||
// present).
|
||||
for root in sandbox_policy.get_writable_roots_with_cwd(cwd) {
|
||||
// Check if this path corresponds exactly to cwd to map to
|
||||
// `disk-write-cwd`, otherwise use the generic folder rule.
|
||||
if root == cwd {
|
||||
linux_cmd.extend(["-s", "disk-write-cwd"].map(String::from));
|
||||
} else {
|
||||
linux_cmd.extend([
|
||||
"-s".to_string(),
|
||||
format!("disk-write-folder={}", root.to_string_lossy()),
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sandbox_policy.has_full_network_access() {
|
||||
linux_cmd.extend(["-s", "network-full-access"].map(String::from));
|
||||
}
|
||||
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
linux_cmd.push("--".to_string());
|
||||
let mut linux_cmd: Vec<String> = vec![
|
||||
sandbox_policy_cwd,
|
||||
sandbox_policy_json,
|
||||
// Separator so that command arguments starting with `-` are not parsed as
|
||||
// options of the helper itself.
|
||||
"--".to_string(),
|
||||
];
|
||||
|
||||
// Append the original tool command.
|
||||
linux_cmd.extend(command);
|
||||
@@ -267,63 +223,24 @@ fn create_linux_sandbox_command_args(
|
||||
linux_cmd
|
||||
}
|
||||
|
||||
fn create_seatbelt_command_args(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
) -> Vec<String> {
|
||||
let (file_write_policy, extra_cli_args) = {
|
||||
if sandbox_policy.has_full_disk_write_access() {
|
||||
// Allegedly, this is more permissive than `(allow file-write*)`.
|
||||
(
|
||||
r#"(allow file-write* (regex #"^/"))"#.to_string(),
|
||||
Vec::<String>::new(),
|
||||
)
|
||||
} else {
|
||||
let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
|
||||
let (writable_folder_policies, cli_args): (Vec<String>, Vec<String>) = writable_roots
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, root)| {
|
||||
let param_name = format!("WRITABLE_ROOT_{index}");
|
||||
let policy: String = format!("(subpath (param \"{param_name}\"))");
|
||||
let cli_arg = format!("-D{param_name}={}", root.to_string_lossy());
|
||||
(policy, cli_arg)
|
||||
})
|
||||
.unzip();
|
||||
if writable_folder_policies.is_empty() {
|
||||
("".to_string(), Vec::<String>::new())
|
||||
} else {
|
||||
let file_write_policy = format!(
|
||||
"(allow file-write*\n{}\n)",
|
||||
writable_folder_policies.join(" ")
|
||||
);
|
||||
(file_write_policy, cli_args)
|
||||
}
|
||||
}
|
||||
};
|
||||
/// We don't have a fully deterministic way to tell if our command failed
|
||||
/// because of the sandbox - a command in the user's zshrc file might hit an
|
||||
/// error, but the command itself might fail or succeed for other reasons.
|
||||
/// For now, we conservatively check for 'command not found' (exit code 127),
|
||||
/// and can add additional cases as necessary.
|
||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
|
||||
if sandbox_type == SandboxType::None {
|
||||
return false;
|
||||
}
|
||||
|
||||
let file_read_policy = if sandbox_policy.has_full_disk_read_access() {
|
||||
"; allow read-only file operations\n(allow file-read*)"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
// Quick rejects: well-known non-sandbox shell exit codes
|
||||
// 127: command not found, 2: misuse of shell builtins
|
||||
if exit_code == 127 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO(mbolin): apply_patch calls must also honor the SandboxPolicy.
|
||||
let network_policy = if sandbox_policy.has_full_network_access() {
|
||||
"(allow network-outbound)\n(allow network-inbound)\n(allow system-socket)"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let full_policy = format!(
|
||||
"{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}"
|
||||
);
|
||||
let mut seatbelt_args: Vec<String> = vec!["-p".to_string(), full_policy];
|
||||
seatbelt_args.extend(extra_cli_args);
|
||||
seatbelt_args.push("--".to_string());
|
||||
seatbelt_args.extend(command);
|
||||
seatbelt_args
|
||||
// For all other cases, we assume the sandbox is the cause
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -342,15 +259,16 @@ pub struct ExecToolCallOutput {
|
||||
}
|
||||
|
||||
async fn exec(
|
||||
ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
timeout_ms,
|
||||
env,
|
||||
}: ExecParams,
|
||||
params: ExecParams,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
ctrl_c: Arc<Notify>,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command, cwd, env, ..
|
||||
} = params;
|
||||
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
CodexErr::Io(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
@@ -368,62 +286,7 @@ async fn exec(
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, ctrl_c, timeout_ms).await
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum StdioPolicy {
|
||||
RedirectForShellTool,
|
||||
Inherit,
|
||||
}
|
||||
|
||||
/// Spawns the appropriate child process for the ExecParams and SandboxPolicy,
|
||||
/// ensuring the args and environment variables used to create the `Command`
|
||||
/// (and `Child`) honor the configuration.
|
||||
///
|
||||
/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
|
||||
/// we need to determine whether to set the
|
||||
/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable.
|
||||
async fn spawn_child_async(
|
||||
program: PathBuf,
|
||||
args: Vec<String>,
|
||||
#[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>,
|
||||
cwd: PathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let mut cmd = Command::new(&program);
|
||||
#[cfg(unix)]
|
||||
cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from));
|
||||
cmd.args(args);
|
||||
cmd.current_dir(cwd);
|
||||
cmd.env_clear();
|
||||
cmd.envs(env);
|
||||
|
||||
if !sandbox_policy.has_full_network_access() {
|
||||
cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
|
||||
}
|
||||
|
||||
match stdio_policy {
|
||||
StdioPolicy::RedirectForShellTool => {
|
||||
// Do not create a file descriptor for stdin because otherwise some
|
||||
// commands may hang forever waiting for input. For example, ripgrep has
|
||||
// a heuristic where it may try to read from stdin as explained here:
|
||||
// https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103
|
||||
cmd.stdin(Stdio::null());
|
||||
|
||||
cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
}
|
||||
StdioPolicy::Inherit => {
|
||||
// Inherit stdin, stdout, and stderr from the parent process.
|
||||
cmd.stdin(Stdio::inherit())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit());
|
||||
}
|
||||
}
|
||||
|
||||
cmd.kill_on_drop(true).spawn()
|
||||
consume_truncated_output(child, ctrl_c, timeout, stdout_stream).await
|
||||
}
|
||||
|
||||
/// Consumes the output of a child process, truncating it so it is suitable for
|
||||
@@ -431,7 +294,8 @@ async fn spawn_child_async(
|
||||
pub(crate) async fn consume_truncated_output(
|
||||
mut child: Child,
|
||||
ctrl_c: Arc<Notify>,
|
||||
timeout_ms: Option<u64>,
|
||||
timeout: Duration,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
// Both stdout and stderr were configured with `Stdio::piped()`
|
||||
// above, therefore `take()` should normally return `Some`. If it doesn't
|
||||
@@ -452,15 +316,18 @@ pub(crate) async fn consume_truncated_output(
|
||||
BufReader::new(stdout_reader),
|
||||
MAX_STREAM_OUTPUT,
|
||||
MAX_STREAM_OUTPUT_LINES,
|
||||
stdout_stream.clone(),
|
||||
false,
|
||||
));
|
||||
let stderr_handle = tokio::spawn(read_capped(
|
||||
BufReader::new(stderr_reader),
|
||||
MAX_STREAM_OUTPUT,
|
||||
MAX_STREAM_OUTPUT_LINES,
|
||||
stdout_stream.clone(),
|
||||
true,
|
||||
));
|
||||
|
||||
let interrupted = ctrl_c.notified();
|
||||
let timeout = Duration::from_millis(timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS));
|
||||
let exit_status = tokio::select! {
|
||||
result = tokio::time::timeout(timeout, child.wait()) => {
|
||||
match result {
|
||||
@@ -490,10 +357,12 @@ pub(crate) async fn consume_truncated_output(
|
||||
})
|
||||
}
|
||||
|
||||
async fn read_capped<R: AsyncRead + Unpin>(
|
||||
async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
|
||||
mut reader: R,
|
||||
max_output: usize,
|
||||
max_lines: usize,
|
||||
stream: Option<StdoutStream>,
|
||||
is_stderr: bool,
|
||||
) -> io::Result<Vec<u8>> {
|
||||
let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
|
||||
let mut tmp = [0u8; 8192];
|
||||
@@ -507,6 +376,25 @@ async fn read_capped<R: AsyncRead + Unpin>(
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(stream) = &stream {
|
||||
let chunk = tmp[..n].to_vec();
|
||||
let msg = EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
|
||||
call_id: stream.call_id.clone(),
|
||||
stream: if is_stderr {
|
||||
ExecOutputStream::Stderr
|
||||
} else {
|
||||
ExecOutputStream::Stdout
|
||||
},
|
||||
chunk: ByteBuf::from(chunk),
|
||||
});
|
||||
let event = Event {
|
||||
id: stream.sub_id.clone(),
|
||||
msg,
|
||||
};
|
||||
#[allow(clippy::let_unit_value)]
|
||||
let _ = stream.tx_event.send(event).await;
|
||||
}
|
||||
|
||||
// Copy into the buffer only while we still have byte and line budget.
|
||||
if remaining_bytes > 0 && remaining_lines > 0 {
|
||||
let mut copy_len = 0;
|
||||
|
||||
@@ -11,14 +11,6 @@ env_flags! {
|
||||
pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
|
||||
value.parse().map(Duration::from_millis)
|
||||
};
|
||||
pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;
|
||||
pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;
|
||||
|
||||
// We generally don't want to disconnect; this updates the timeout to be five minutes
|
||||
// which matches the upstream typescript codex impl.
|
||||
pub OPENAI_STREAM_IDLE_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
|
||||
value.parse().map(Duration::from_millis)
|
||||
};
|
||||
|
||||
/// Fixture path for offline tests (see client.rs).
|
||||
pub CODEX_RS_SSE_FIXTURE: Option<&str> = None;
|
||||
|
||||
316
codex-rs/core/src/git_info.rs
Normal file
316
codex-rs/core/src/git_info.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
use std::path::Path;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use tokio::process::Command;
|
||||
use tokio::time::Duration as TokioDuration;
|
||||
use tokio::time::timeout;
|
||||
|
||||
/// Timeout for git commands to prevent freezing on large repositories
|
||||
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct GitInfo {
|
||||
/// Current commit hash (SHA)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub commit_hash: Option<String>,
|
||||
/// Current branch name
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub branch: Option<String>,
|
||||
/// Repository URL (if available from remote)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub repository_url: Option<String>,
|
||||
}
|
||||
|
||||
/// Collect git repository information from the given working directory using command-line git.
|
||||
/// Returns None if no git repository is found or if git operations fail.
|
||||
/// Uses timeouts to prevent freezing on large repositories.
|
||||
/// All git commands (except the initial repo check) run in parallel for better performance.
|
||||
pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
|
||||
// Check if we're in a git repository first
|
||||
let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
|
||||
.await?
|
||||
.status
|
||||
.success();
|
||||
|
||||
if !is_git_repo {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Run all git info collection commands in parallel
|
||||
let (commit_result, branch_result, url_result) = tokio::join!(
|
||||
run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
|
||||
run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
|
||||
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
|
||||
);
|
||||
|
||||
let mut git_info = GitInfo {
|
||||
commit_hash: None,
|
||||
branch: None,
|
||||
repository_url: None,
|
||||
};
|
||||
|
||||
// Process commit hash
|
||||
if let Some(output) = commit_result {
|
||||
if output.status.success() {
|
||||
if let Ok(hash) = String::from_utf8(output.stdout) {
|
||||
git_info.commit_hash = Some(hash.trim().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process branch name
|
||||
if let Some(output) = branch_result {
|
||||
if output.status.success() {
|
||||
if let Ok(branch) = String::from_utf8(output.stdout) {
|
||||
let branch = branch.trim();
|
||||
if branch != "HEAD" {
|
||||
git_info.branch = Some(branch.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process repository URL
|
||||
if let Some(output) = url_result {
|
||||
if output.status.success() {
|
||||
if let Ok(url) = String::from_utf8(output.stdout) {
|
||||
git_info.repository_url = Some(url.trim().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(git_info)
|
||||
}
|
||||
|
||||
/// Run a git command with a timeout to prevent blocking on large repositories
|
||||
async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
|
||||
let result = timeout(
|
||||
GIT_COMMAND_TIMEOUT,
|
||||
Command::new("git").args(args).current_dir(cwd).output(),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(output)) => Some(output),
|
||||
_ => None, // Timeout or error
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::expect_used)]
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use super::*;
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// Helper function to create a test git repository
|
||||
async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
|
||||
let repo_path = temp_dir.path().to_path_buf();
|
||||
let envs = vec![
|
||||
("GIT_CONFIG_GLOBAL", "/dev/null"),
|
||||
("GIT_CONFIG_NOSYSTEM", "1"),
|
||||
];
|
||||
|
||||
// Initialize git repo
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["init"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to init git repo");
|
||||
|
||||
// Configure git user (required for commits)
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.name", "Test User"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to set git user name");
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.email", "test@example.com"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to set git user email");
|
||||
|
||||
// Create a test file and commit it
|
||||
let test_file = repo_path.join("test.txt");
|
||||
fs::write(&test_file, "test content").expect("Failed to write test file");
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["add", "."])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to add files");
|
||||
|
||||
Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["commit", "-m", "Initial commit"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to commit");
|
||||
|
||||
repo_path
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_git_info_non_git_directory() {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let result = collect_git_info(temp_dir.path()).await;
|
||||
assert!(result.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_git_info_git_repository() {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let repo_path = create_test_git_repo(&temp_dir).await;
|
||||
|
||||
let git_info = collect_git_info(&repo_path)
|
||||
.await
|
||||
.expect("Should collect git info from repo");
|
||||
|
||||
// Should have commit hash
|
||||
assert!(git_info.commit_hash.is_some());
|
||||
let commit_hash = git_info.commit_hash.unwrap();
|
||||
assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
|
||||
assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
|
||||
// Should have branch (likely "main" or "master")
|
||||
assert!(git_info.branch.is_some());
|
||||
let branch = git_info.branch.unwrap();
|
||||
assert!(branch == "main" || branch == "master");
|
||||
|
||||
// Repository URL might be None for local repos without remote
|
||||
// This is acceptable behavior
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_git_info_with_remote() {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let repo_path = create_test_git_repo(&temp_dir).await;
|
||||
|
||||
// Add a remote origin
|
||||
Command::new("git")
|
||||
.args([
|
||||
"remote",
|
||||
"add",
|
||||
"origin",
|
||||
"https://github.com/example/repo.git",
|
||||
])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to add remote");
|
||||
|
||||
let git_info = collect_git_info(&repo_path)
|
||||
.await
|
||||
.expect("Should collect git info from repo");
|
||||
|
||||
// Should have repository URL
|
||||
assert_eq!(
|
||||
git_info.repository_url,
|
||||
Some("https://github.com/example/repo.git".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_git_info_detached_head() {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let repo_path = create_test_git_repo(&temp_dir).await;
|
||||
|
||||
// Get the current commit hash
|
||||
let output = Command::new("git")
|
||||
.args(["rev-parse", "HEAD"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to get HEAD");
|
||||
let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
|
||||
|
||||
// Checkout the commit directly (detached HEAD)
|
||||
Command::new("git")
|
||||
.args(["checkout", &commit_hash])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to checkout commit");
|
||||
|
||||
let git_info = collect_git_info(&repo_path)
|
||||
.await
|
||||
.expect("Should collect git info from repo");
|
||||
|
||||
// Should have commit hash
|
||||
assert!(git_info.commit_hash.is_some());
|
||||
// Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
|
||||
assert!(git_info.branch.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_git_info_with_branch() {
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let repo_path = create_test_git_repo(&temp_dir).await;
|
||||
|
||||
// Create and checkout a new branch
|
||||
Command::new("git")
|
||||
.args(["checkout", "-b", "feature-branch"])
|
||||
.current_dir(&repo_path)
|
||||
.output()
|
||||
.await
|
||||
.expect("Failed to create branch");
|
||||
|
||||
let git_info = collect_git_info(&repo_path)
|
||||
.await
|
||||
.expect("Should collect git info from repo");
|
||||
|
||||
// Should have the new branch name
|
||||
assert_eq!(git_info.branch, Some("feature-branch".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_info_serialization() {
|
||||
let git_info = GitInfo {
|
||||
commit_hash: Some("abc123def456".to_string()),
|
||||
branch: Some("main".to_string()),
|
||||
repository_url: Some("https://github.com/example/repo.git".to_string()),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
|
||||
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
|
||||
|
||||
assert_eq!(parsed["commit_hash"], "abc123def456");
|
||||
assert_eq!(parsed["branch"], "main");
|
||||
assert_eq!(
|
||||
parsed["repository_url"],
|
||||
"https://github.com/example/repo.git"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_git_info_serialization_with_nones() {
|
||||
let git_info = GitInfo {
|
||||
commit_hash: None,
|
||||
branch: None,
|
||||
repository_url: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
|
||||
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
|
||||
|
||||
// Fields with None values should be omitted due to skip_serializing_if
|
||||
assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
|
||||
assert!(!parsed.as_object().unwrap().contains_key("branch"));
|
||||
assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
|
||||
}
|
||||
}
|
||||
@@ -1,31 +1,57 @@
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter::Tree;
|
||||
use tree_sitter_bash::LANGUAGE as BASH;
|
||||
use crate::bash::try_parse_bash;
|
||||
use crate::bash::try_parse_word_only_commands_sequence;
|
||||
|
||||
pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||
if is_safe_to_call_with_exec(command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO(mbolin): Also support safe commands that are piped together such
|
||||
// as `cat foo | wc -l`.
|
||||
matches!(
|
||||
command,
|
||||
[bash, flag, script]
|
||||
if bash == "bash"
|
||||
&& flag == "-lc"
|
||||
&& try_parse_bash(script).and_then(|tree|
|
||||
try_parse_single_word_only_command(&tree, script)).is_some_and(|parsed_bash_command| is_safe_to_call_with_exec(&parsed_bash_command))
|
||||
)
|
||||
// Support `bash -lc "..."` where the script consists solely of one or
|
||||
// more "plain" commands (only bare words / quoted strings) combined with
|
||||
// a conservative allow‑list of shell operators that themselves do not
|
||||
// introduce side effects ( "&&", "||", ";", and "|" ). If every
|
||||
// individual command in the script is itself a known‑safe command, then
|
||||
// the composite expression is considered safe.
|
||||
if let [bash, flag, script] = command {
|
||||
if bash == "bash" && flag == "-lc" {
|
||||
if let Some(tree) = try_parse_bash(script) {
|
||||
if let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script) {
|
||||
if !all_commands.is_empty()
|
||||
&& all_commands
|
||||
.iter()
|
||||
.all(|cmd| is_safe_to_call_with_exec(cmd))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
let cmd0 = command.first().map(String::as_str);
|
||||
|
||||
match cmd0 {
|
||||
#[rustfmt::skip]
|
||||
Some(
|
||||
"cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "rg" | "tail" | "wc" | "which",
|
||||
) => true,
|
||||
"cat" |
|
||||
"cd" |
|
||||
"echo" |
|
||||
"false" |
|
||||
"grep" |
|
||||
"head" |
|
||||
"ls" |
|
||||
"nl" |
|
||||
"pwd" |
|
||||
"tail" |
|
||||
"true" |
|
||||
"wc" |
|
||||
"which") => {
|
||||
true
|
||||
},
|
||||
|
||||
Some("find") => {
|
||||
// Certain options to `find` can delete files, write to files, or
|
||||
@@ -46,6 +72,29 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
|
||||
}
|
||||
|
||||
// Ripgrep
|
||||
Some("rg") => {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre",
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
"--hostname-bin",
|
||||
];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
];
|
||||
|
||||
!command.iter().any(|arg| {
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
|
||||
// Git
|
||||
Some("git") => matches!(
|
||||
command.get(1).map(String::as_str),
|
||||
@@ -72,90 +121,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
|
||||
let lang = BASH.into();
|
||||
let mut parser = Parser::new();
|
||||
#[expect(clippy::expect_used)]
|
||||
parser.set_language(&lang).expect("load bash grammar");
|
||||
|
||||
let old_tree: Option<&Tree> = None;
|
||||
parser.parse(bash_lc_arg, old_tree)
|
||||
}
|
||||
|
||||
/// If `tree` represents a single Bash command whose name and every argument is
|
||||
/// an ordinary `word`, return those words in order; otherwise, return `None`.
|
||||
///
|
||||
/// `src` must be the exact source string that was parsed into `tree`, so we can
|
||||
/// extract the text for every node.
|
||||
pub fn try_parse_single_word_only_command(tree: &Tree, src: &str) -> Option<Vec<String>> {
|
||||
// Any parse error is an immediate rejection.
|
||||
if tree.root_node().has_error() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// (program …) with exactly one statement
|
||||
let root = tree.root_node();
|
||||
if root.kind() != "program" || root.named_child_count() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let cmd = root.named_child(0)?; // (command …)
|
||||
if cmd.kind() != "command" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut words = Vec::new();
|
||||
let mut cursor = cmd.walk();
|
||||
|
||||
for child in cmd.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
// The command name node wraps one `word` child.
|
||||
"command_name" => {
|
||||
let word_node = child.named_child(0)?; // make sure it's only a word
|
||||
if word_node.kind() != "word" {
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
// Positional‑argument word (allowed).
|
||||
"word" | "number" => {
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"string" => {
|
||||
if child.child_count() == 3
|
||||
&& child.child(0)?.kind() == "\""
|
||||
&& child.child(1)?.kind() == "string_content"
|
||||
&& child.child(2)?.kind() == "\""
|
||||
{
|
||||
words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
} else {
|
||||
// Anything else means the command is *not* plain words.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
"concatenation" => {
|
||||
// TODO: Consider things like `'ab\'a'`.
|
||||
return None;
|
||||
}
|
||||
"raw_string" => {
|
||||
// Raw string is a single word, but we need to strip the quotes.
|
||||
let raw_string = child.utf8_text(src.as_bytes()).ok()?;
|
||||
let stripped = raw_string
|
||||
.strip_prefix('\'')
|
||||
.and_then(|s| s.strip_suffix('\''));
|
||||
if let Some(stripped) = stripped {
|
||||
words.push(stripped.to_owned());
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
// Anything else means the command is *not* plain words.
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
|
||||
Some(words)
|
||||
}
|
||||
// (bash parsing helpers implemented in crate::bash)
|
||||
|
||||
/* ----------------------------------------------------------
|
||||
Example
|
||||
@@ -193,6 +159,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
@@ -209,6 +176,11 @@ mod tests {
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"sed", "-n", "1,5p", "file.txt"
|
||||
])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"nl",
|
||||
"-nrz",
|
||||
"Cargo.toml"
|
||||
])));
|
||||
|
||||
// Safe `find` command (no unsafe options).
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
@@ -240,8 +212,41 @@ mod tests {
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {:?} to be unsafe",
|
||||
args
|
||||
"expected {args:?} to be unsafe"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ripgrep_rules() {
|
||||
// Safe ripgrep invocations – none of the unsafe flags are present.
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"rg",
|
||||
"Cargo.toml",
|
||||
"-n"
|
||||
])));
|
||||
|
||||
// Unsafe flags that do not take an argument (present verbatim).
|
||||
for args in [
|
||||
vec_str(&["rg", "--search-zip", "files"]),
|
||||
vec_str(&["rg", "-z", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to zip-search flag",
|
||||
);
|
||||
}
|
||||
|
||||
// Unsafe flags that expect a value, provided in both split and = forms.
|
||||
for args in [
|
||||
vec_str(&["rg", "--pre", "pwned", "files"]),
|
||||
vec_str(&["rg", "--pre=pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to external-command flag",
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -278,6 +283,30 @@ mod tests {
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_safe_examples_with_operators() {
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"grep -R \"Cargo.toml\" -n || true"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"ls && pwd"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"echo 'hi' ; ls"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"ls | wc -l"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_unsafe_examples() {
|
||||
assert!(
|
||||
@@ -291,44 +320,29 @@ mod tests {
|
||||
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
|
||||
"Unsafe find option should not be auto‑approved."
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_parse_single_word_only_command() {
|
||||
let script_with_single_quoted_string = "sed -n '1,5p' file.txt";
|
||||
let parsed_words = try_parse_bash(script_with_single_quoted_string)
|
||||
.and_then(|tree| {
|
||||
try_parse_single_word_only_command(&tree, script_with_single_quoted_string)
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
vec![
|
||||
"sed".to_string(),
|
||||
"-n".to_string(),
|
||||
// Ensure the single quotes are properly removed.
|
||||
"1,5p".to_string(),
|
||||
"file.txt".to_string()
|
||||
],
|
||||
parsed_words,
|
||||
"Unsafe find option should not be auto-approved."
|
||||
);
|
||||
|
||||
let script_with_number_arg = "ls -1";
|
||||
let parsed_words = try_parse_bash(script_with_number_arg)
|
||||
.and_then(|tree| try_parse_single_word_only_command(&tree, script_with_number_arg))
|
||||
.unwrap();
|
||||
assert_eq!(vec!["ls", "-1"], parsed_words,);
|
||||
// Disallowed because of unsafe command in sequence.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
|
||||
"Sequence containing unsafe command must be rejected"
|
||||
);
|
||||
|
||||
let script_with_double_quoted_string_with_no_funny_stuff_arg = "grep -R \"Cargo.toml\" -n";
|
||||
let parsed_words = try_parse_bash(script_with_double_quoted_string_with_no_funny_stuff_arg)
|
||||
.and_then(|tree| {
|
||||
try_parse_single_word_only_command(
|
||||
&tree,
|
||||
script_with_double_quoted_string_with_no_funny_stuff_arg,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(vec!["grep", "-R", "Cargo.toml", "-n"], parsed_words);
|
||||
// Disallowed because of parentheses / subshell.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
|
||||
"Parentheses (subshell) are not provably safe with the current parser"
|
||||
);
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
|
||||
"Nested parentheses are not provably safe with the current parser"
|
||||
);
|
||||
|
||||
// Disallowed redirection.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
|
||||
"> redirection should be rejected"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,11 +5,14 @@
|
||||
// the TUI or the tracing stack).
|
||||
#![deny(clippy::print_stdout, clippy::print_stderr)]
|
||||
|
||||
mod apply_patch;
|
||||
mod bash;
|
||||
mod chat_completions;
|
||||
mod client;
|
||||
mod client_common;
|
||||
pub mod codex;
|
||||
pub use codex::Codex;
|
||||
pub use codex::CodexSpawnOk;
|
||||
pub mod codex_wrapper;
|
||||
pub mod config;
|
||||
pub mod config_profile;
|
||||
@@ -19,21 +22,31 @@ pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
mod is_safe_command;
|
||||
mod mcp_connection_manager;
|
||||
mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::WireApi;
|
||||
pub use model_provider_info::built_in_model_providers;
|
||||
pub use model_provider_info::create_oss_provider_with_base_url;
|
||||
pub mod model_family;
|
||||
mod models;
|
||||
pub mod openai_api_key;
|
||||
mod openai_model_info;
|
||||
mod openai_tools;
|
||||
pub mod plan_tool;
|
||||
mod project_doc;
|
||||
pub mod protocol;
|
||||
mod rollout;
|
||||
mod safety;
|
||||
pub(crate) mod safety;
|
||||
pub mod seatbelt;
|
||||
pub mod shell;
|
||||
pub mod spawn;
|
||||
pub mod turn_diff_tracker;
|
||||
mod user_notification;
|
||||
pub mod util;
|
||||
|
||||
pub use client_common::model_supports_reasoning_summaries;
|
||||
pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
|
||||
pub use safety::get_platform_sandbox;
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
//! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::OsString;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -16,8 +18,13 @@ use codex_mcp_client::McpClient;
|
||||
use mcp_types::ClientCapabilities;
|
||||
use mcp_types::Implementation;
|
||||
use mcp_types::Tool;
|
||||
|
||||
use serde_json::json;
|
||||
use sha1::Digest;
|
||||
use sha1::Sha1;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::config_types::McpServerConfig;
|
||||
|
||||
@@ -26,7 +33,8 @@ use crate::config_types::McpServerConfig;
|
||||
///
|
||||
/// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
|
||||
/// choose a delimiter from this character set.
|
||||
const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";
|
||||
const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
||||
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
||||
|
||||
/// Timeout for the `tools/list` request.
|
||||
const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
@@ -35,16 +43,42 @@ const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
/// spawned successfully.
|
||||
pub type ClientStartErrors = HashMap<String, anyhow::Error>;
|
||||
|
||||
fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
|
||||
format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
|
||||
fn qualify_tools(tools: Vec<ToolInfo>) -> HashMap<String, ToolInfo> {
|
||||
let mut used_names = HashSet::new();
|
||||
let mut qualified_tools = HashMap::new();
|
||||
for tool in tools {
|
||||
let mut qualified_name = format!(
|
||||
"{}{}{}",
|
||||
tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
|
||||
);
|
||||
if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(qualified_name.as_bytes());
|
||||
let sha1 = hasher.finalize();
|
||||
let sha1_str = format!("{sha1:x}");
|
||||
|
||||
// Truncate to make room for the hash suffix
|
||||
let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
|
||||
|
||||
qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
|
||||
}
|
||||
|
||||
if used_names.contains(&qualified_name) {
|
||||
warn!("skipping duplicated tool {}", qualified_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
used_names.insert(qualified_name.clone());
|
||||
qualified_tools.insert(qualified_name, tool);
|
||||
}
|
||||
|
||||
qualified_tools
|
||||
}
|
||||
|
||||
pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
|
||||
let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
|
||||
if server.is_empty() || tool.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((server.to_string(), tool.to_string()))
|
||||
struct ToolInfo {
|
||||
server_name: String,
|
||||
tool_name: String,
|
||||
tool: Tool,
|
||||
}
|
||||
|
||||
/// A thin wrapper around a set of running [`McpClient`] instances.
|
||||
@@ -57,7 +91,7 @@ pub(crate) struct McpConnectionManager {
|
||||
clients: HashMap<String, std::sync::Arc<McpClient>>,
|
||||
|
||||
/// Fully qualified tool name -> tool instance.
|
||||
tools: HashMap<String, Tool>,
|
||||
tools: HashMap<String, ToolInfo>,
|
||||
}
|
||||
|
||||
impl McpConnectionManager {
|
||||
@@ -79,12 +113,27 @@ impl McpConnectionManager {
|
||||
|
||||
// Launch all configured servers concurrently.
|
||||
let mut join_set = JoinSet::new();
|
||||
let mut errors = ClientStartErrors::new();
|
||||
|
||||
for (server_name, cfg) in mcp_servers {
|
||||
// TODO: Verify server name: require `^[a-zA-Z0-9_-]+$`?
|
||||
// Validate server name before spawning
|
||||
if !is_valid_mcp_server_name(&server_name) {
|
||||
let error = anyhow::anyhow!(
|
||||
"invalid server name '{}': must match pattern ^[a-zA-Z0-9_-]+$",
|
||||
server_name
|
||||
);
|
||||
errors.insert(server_name, error);
|
||||
continue;
|
||||
}
|
||||
|
||||
join_set.spawn(async move {
|
||||
let McpServerConfig { command, args, env } = cfg;
|
||||
let client_res = McpClient::new_stdio_client(command, args, env).await;
|
||||
let client_res = McpClient::new_stdio_client(
|
||||
command.into(),
|
||||
args.into_iter().map(OsString::from).collect(),
|
||||
env,
|
||||
)
|
||||
.await;
|
||||
match client_res {
|
||||
Ok(client) => {
|
||||
// Initialize the client.
|
||||
@@ -93,10 +142,14 @@ impl McpConnectionManager {
|
||||
experimental: None,
|
||||
roots: None,
|
||||
sampling: None,
|
||||
// https://modelcontextprotocol.io/specification/2025-06-18/client/elicitation#capabilities
|
||||
// indicates this should be an empty object.
|
||||
elicitation: Some(json!({})),
|
||||
},
|
||||
client_info: Implementation {
|
||||
name: "codex-mcp-client".to_owned(),
|
||||
version: env!("CARGO_PKG_VERSION").to_owned(),
|
||||
title: Some("Codex".into()),
|
||||
},
|
||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
|
||||
};
|
||||
@@ -117,7 +170,6 @@ impl McpConnectionManager {
|
||||
|
||||
let mut clients: HashMap<String, std::sync::Arc<McpClient>> =
|
||||
HashMap::with_capacity(join_set.len());
|
||||
let mut errors = ClientStartErrors::new();
|
||||
|
||||
while let Some(res) = join_set.join_next().await {
|
||||
let (server_name, client_res) = res?; // JoinError propagation
|
||||
@@ -132,7 +184,9 @@ impl McpConnectionManager {
|
||||
}
|
||||
}
|
||||
|
||||
let tools = list_all_tools(&clients).await?;
|
||||
let all_tools = list_all_tools(&clients).await?;
|
||||
|
||||
let tools = qualify_tools(all_tools);
|
||||
|
||||
Ok((Self { clients, tools }, errors))
|
||||
}
|
||||
@@ -140,7 +194,10 @@ impl McpConnectionManager {
|
||||
/// Returns a single map that contains **all** tools. Each key is the
|
||||
/// fully-qualified name for the tool.
|
||||
pub fn list_all_tools(&self) -> HashMap<String, Tool> {
|
||||
self.tools.clone()
|
||||
self.tools
|
||||
.iter()
|
||||
.map(|(name, tool)| (name.clone(), tool.tool.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Invoke the tool indicated by the (server, tool) pair.
|
||||
@@ -162,13 +219,19 @@ impl McpConnectionManager {
|
||||
.await
|
||||
.with_context(|| format!("tool call failed for `{server}/{tool}`"))
|
||||
}
|
||||
|
||||
pub fn parse_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
self.tools
|
||||
.get(tool_name)
|
||||
.map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Query every server for its available tools and return a single map that
|
||||
/// contains **all** tools. Each key is the fully-qualified name for the tool.
|
||||
pub async fn list_all_tools(
|
||||
async fn list_all_tools(
|
||||
clients: &HashMap<String, std::sync::Arc<McpClient>>,
|
||||
) -> Result<HashMap<String, Tool>> {
|
||||
) -> Result<Vec<ToolInfo>> {
|
||||
let mut join_set = JoinSet::new();
|
||||
|
||||
// Spawn one task per server so we can query them concurrently. This
|
||||
@@ -185,18 +248,19 @@ pub async fn list_all_tools(
|
||||
});
|
||||
}
|
||||
|
||||
let mut aggregated: HashMap<String, Tool> = HashMap::with_capacity(join_set.len());
|
||||
let mut aggregated: Vec<ToolInfo> = Vec::with_capacity(join_set.len());
|
||||
|
||||
while let Some(join_res) = join_set.join_next().await {
|
||||
let (server_name, list_result) = join_res?;
|
||||
let list_result = list_result?;
|
||||
|
||||
for tool in list_result.tools {
|
||||
// TODO(mbolin): escape tool names that contain invalid characters.
|
||||
let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
|
||||
if aggregated.insert(fq_name.clone(), tool).is_some() {
|
||||
panic!("tool name collision for '{fq_name}': suspicious");
|
||||
}
|
||||
let tool_info = ToolInfo {
|
||||
server_name: server_name.clone(),
|
||||
tool_name: tool.name.clone(),
|
||||
tool,
|
||||
};
|
||||
aggregated.push(tool_info);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,3 +272,99 @@ pub async fn list_all_tools(
|
||||
|
||||
Ok(aggregated)
|
||||
}
|
||||
|
||||
fn is_valid_mcp_server_name(server_name: &str) -> bool {
|
||||
!server_name.is_empty()
|
||||
&& server_name
|
||||
.chars()
|
||||
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use mcp_types::ToolInputSchema;
|
||||
|
||||
fn create_test_tool(server_name: &str, tool_name: &str) -> ToolInfo {
|
||||
ToolInfo {
|
||||
server_name: server_name.to_string(),
|
||||
tool_name: tool_name.to_string(),
|
||||
tool: Tool {
|
||||
annotations: None,
|
||||
description: Some(format!("Test tool: {tool_name}")),
|
||||
input_schema: ToolInputSchema {
|
||||
properties: None,
|
||||
required: None,
|
||||
r#type: "object".to_string(),
|
||||
},
|
||||
name: tool_name.to_string(),
|
||||
output_schema: None,
|
||||
title: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_short_non_duplicated_names() {
|
||||
let tools = vec![
|
||||
create_test_tool("server1", "tool1"),
|
||||
create_test_tool("server1", "tool2"),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
assert_eq!(qualified_tools.len(), 2);
|
||||
assert!(qualified_tools.contains_key("server1__tool1"));
|
||||
assert!(qualified_tools.contains_key("server1__tool2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_duplicated_names_skipped() {
|
||||
let tools = vec![
|
||||
create_test_tool("server1", "duplicate_tool"),
|
||||
create_test_tool("server1", "duplicate_tool"),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
// Only the first tool should remain, the second is skipped
|
||||
assert_eq!(qualified_tools.len(), 1);
|
||||
assert!(qualified_tools.contains_key("server1__duplicate_tool"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_qualify_tools_long_names_same_server() {
|
||||
let server_name = "my_server";
|
||||
|
||||
let tools = vec![
|
||||
create_test_tool(
|
||||
server_name,
|
||||
"extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
|
||||
),
|
||||
create_test_tool(
|
||||
server_name,
|
||||
"yet_another_extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
|
||||
),
|
||||
];
|
||||
|
||||
let qualified_tools = qualify_tools(tools);
|
||||
|
||||
assert_eq!(qualified_tools.len(), 2);
|
||||
|
||||
let mut keys: Vec<_> = qualified_tools.keys().cloned().collect();
|
||||
keys.sort();
|
||||
|
||||
assert_eq!(keys[0].len(), 64);
|
||||
assert_eq!(
|
||||
keys[0],
|
||||
"my_server__extremely_lena02e507efc5a9de88637e436690364fd4219e4ef"
|
||||
);
|
||||
|
||||
assert_eq!(keys[1].len(), 64);
|
||||
assert_eq!(
|
||||
keys[1],
|
||||
"my_server__yet_another_e1c3987bd9c50b826cbe1687966f79f0c602d19ca"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use tracing::error;
|
||||
|
||||
@@ -7,6 +8,7 @@ use crate::models::FunctionCallOutputPayload;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::McpInvocation;
|
||||
use crate::protocol::McpToolCallBeginEvent;
|
||||
use crate::protocol::McpToolCallEndEvent;
|
||||
|
||||
@@ -41,21 +43,28 @@ pub(crate) async fn handle_mcp_tool_call(
|
||||
}
|
||||
};
|
||||
|
||||
let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: call_id.clone(),
|
||||
let invocation = McpInvocation {
|
||||
server: server.clone(),
|
||||
tool: tool_name.clone(),
|
||||
arguments: arguments_value.clone(),
|
||||
};
|
||||
|
||||
let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
|
||||
call_id: call_id.clone(),
|
||||
invocation: invocation.clone(),
|
||||
});
|
||||
notify_mcp_tool_call_event(sess, sub_id, tool_call_begin_event).await;
|
||||
|
||||
let start = Instant::now();
|
||||
// Perform the tool call.
|
||||
let result = sess
|
||||
.call_tool(&server, &tool_name, arguments_value, timeout)
|
||||
.call_tool(&server, &tool_name, arguments_value.clone(), timeout)
|
||||
.await
|
||||
.map_err(|e| format!("tool call error: {e}"));
|
||||
let tool_call_end_event = EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id: call_id.clone(),
|
||||
invocation,
|
||||
duration: start.elapsed(),
|
||||
result: result.clone(),
|
||||
});
|
||||
|
||||
|
||||
105
codex-rs/core/src/model_family.rs
Normal file
105
codex-rs/core/src/model_family.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
/// A model family is a group of models that share certain characteristics.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ModelFamily {
|
||||
/// The full model slug used to derive this model family, e.g.
|
||||
/// "gpt-4.1-2025-04-14".
|
||||
pub slug: String,
|
||||
|
||||
/// The model family name, e.g. "gpt-4.1". Note this should able to be used
|
||||
/// with [`crate::openai_model_info::get_model_info`].
|
||||
pub family: String,
|
||||
|
||||
/// True if the model needs additional instructions on how to use the
|
||||
/// "virtual" `apply_patch` CLI.
|
||||
pub needs_special_apply_patch_instructions: bool,
|
||||
|
||||
// Whether the `reasoning` field can be set when making a request to this
|
||||
// model family. Note it has `effort` and `summary` subfields (though
|
||||
// `summary` is optional).
|
||||
pub supports_reasoning_summaries: bool,
|
||||
|
||||
// This should be set to true when the model expects a tool named
|
||||
// "local_shell" to be provided. Its contract must be understood natively by
|
||||
// the model such that its description can be omitted.
|
||||
// See https://platform.openai.com/docs/guides/tools-local-shell
|
||||
pub uses_local_shell_tool: bool,
|
||||
|
||||
pub requires_chatgpt_auth: bool,
|
||||
}
|
||||
|
||||
macro_rules! model_family {
|
||||
(
|
||||
$slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
|
||||
) => {{
|
||||
// defaults
|
||||
let mut mf = ModelFamily {
|
||||
slug: $slug.to_string(),
|
||||
family: $family.to_string(),
|
||||
needs_special_apply_patch_instructions: false,
|
||||
supports_reasoning_summaries: false,
|
||||
uses_local_shell_tool: false,
|
||||
requires_chatgpt_auth: false,
|
||||
};
|
||||
// apply overrides
|
||||
$(
|
||||
mf.$key = $value;
|
||||
)*
|
||||
Some(mf)
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! simple_model_family {
|
||||
(
|
||||
$slug:expr, $family:expr
|
||||
) => {{
|
||||
Some(ModelFamily {
|
||||
slug: $slug.to_string(),
|
||||
family: $family.to_string(),
|
||||
needs_special_apply_patch_instructions: false,
|
||||
supports_reasoning_summaries: false,
|
||||
uses_local_shell_tool: false,
|
||||
requires_chatgpt_auth: false,
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
/// Returns a `ModelFamily` for the given model slug, or `None` if the slug
|
||||
/// does not match any known model family.
|
||||
pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
if slug.starts_with("o3") {
|
||||
model_family!(
|
||||
slug, "o3",
|
||||
supports_reasoning_summaries: true,
|
||||
)
|
||||
} else if slug.starts_with("o4-mini") {
|
||||
model_family!(
|
||||
slug, "o4-mini",
|
||||
supports_reasoning_summaries: true,
|
||||
)
|
||||
} else if slug.starts_with("codex-mini-latest") {
|
||||
model_family!(
|
||||
slug, "codex-mini-latest",
|
||||
supports_reasoning_summaries: true,
|
||||
uses_local_shell_tool: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-4.1") {
|
||||
model_family!(
|
||||
slug, "gpt-4.1",
|
||||
needs_special_apply_patch_instructions: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-4o") {
|
||||
simple_model_family!(slug, "gpt-4o")
|
||||
} else if slug.starts_with("gpt-oss") {
|
||||
simple_model_family!(slug, "gpt-oss")
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
simple_model_family!(slug, "gpt-3.5")
|
||||
} else if slug.starts_with("2025-08-06-model") {
|
||||
model_family!(
|
||||
slug, "2025-08-06-model",
|
||||
supports_reasoning_summaries: true,
|
||||
requires_chatgpt_auth: true,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -5,13 +5,18 @@
|
||||
//! 2. User-defined entries inside `~/.codex/config.toml` under the `model_providers`
|
||||
//! key. These override or extend the defaults at runtime.
|
||||
|
||||
use codex_login::AuthMode;
|
||||
use codex_login::CodexAuth;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::env::VarError;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::error::EnvVarError;
|
||||
use crate::openai_api_key::get_openai_api_key;
|
||||
const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
|
||||
const DEFAULT_STREAM_MAX_RETRIES: u64 = 10;
|
||||
const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;
|
||||
|
||||
/// Wire protocol that the provider speaks. Most third-party services only
|
||||
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
|
||||
@@ -22,10 +27,11 @@ use crate::openai_api_key::get_openai_api_key;
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum WireApi {
|
||||
/// The experimental “Responses” API exposed by OpenAI at `/v1/responses`.
|
||||
#[default]
|
||||
/// The Responses API exposed by OpenAI at `/v1/responses`.
|
||||
Responses,
|
||||
|
||||
/// Regular Chat Completions compatible with `/v1/chat/completions`.
|
||||
#[default]
|
||||
Chat,
|
||||
}
|
||||
|
||||
@@ -35,7 +41,7 @@ pub struct ModelProviderInfo {
|
||||
/// Friendly display name.
|
||||
pub name: String,
|
||||
/// Base URL for the provider's OpenAI-compatible API.
|
||||
pub base_url: String,
|
||||
pub base_url: Option<String>,
|
||||
/// Environment variable that stores the user's API key for this provider.
|
||||
pub env_key: Option<String>,
|
||||
|
||||
@@ -44,21 +50,140 @@ pub struct ModelProviderInfo {
|
||||
pub env_key_instructions: Option<String>,
|
||||
|
||||
/// Which wire protocol this provider expects.
|
||||
#[serde(default)]
|
||||
pub wire_api: WireApi,
|
||||
|
||||
/// Optional query parameters to append to the base URL.
|
||||
pub query_params: Option<HashMap<String, String>>,
|
||||
|
||||
/// Additional HTTP headers to include in requests to this provider where
|
||||
/// the (key, value) pairs are the header name and value.
|
||||
pub http_headers: Option<HashMap<String, String>>,
|
||||
|
||||
/// Optional HTTP headers to include in requests to this provider where the
|
||||
/// (key, value) pairs are the header name and _environment variable_ whose
|
||||
/// value should be used. If the environment variable is not set, or the
|
||||
/// value is empty, the header will not be included in the request.
|
||||
pub env_http_headers: Option<HashMap<String, String>>,
|
||||
|
||||
/// Maximum number of times to retry a failed HTTP request to this provider.
|
||||
pub request_max_retries: Option<u64>,
|
||||
|
||||
/// Number of times to retry reconnecting a dropped streaming response before failing.
|
||||
pub stream_max_retries: Option<u64>,
|
||||
|
||||
/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
|
||||
/// the connection as lost.
|
||||
pub stream_idle_timeout_ms: Option<u64>,
|
||||
|
||||
/// Whether this provider requires some form of standard authentication (API key, ChatGPT token).
|
||||
#[serde(default)]
|
||||
pub requires_openai_auth: bool,
|
||||
}
|
||||
|
||||
impl ModelProviderInfo {
|
||||
/// Construct a `POST` RequestBuilder for the given URL using the provided
|
||||
/// reqwest Client applying:
|
||||
/// • provider-specific headers (static + env based)
|
||||
/// • Bearer auth header when an API key is available.
|
||||
/// • Auth token for OAuth.
|
||||
///
|
||||
/// If the provider declares an `env_key` but the variable is missing/empty, returns an [`Err`] identical to the
|
||||
/// one produced by [`ModelProviderInfo::api_key`].
|
||||
pub async fn create_request_builder<'a>(
|
||||
&'a self,
|
||||
client: &'a reqwest::Client,
|
||||
auth: &Option<CodexAuth>,
|
||||
) -> crate::error::Result<reqwest::RequestBuilder> {
|
||||
let effective_auth = match self.api_key() {
|
||||
Ok(Some(key)) => Some(CodexAuth::from_api_key(key)),
|
||||
Ok(None) => auth.clone(),
|
||||
Err(err) => {
|
||||
if auth.is_some() {
|
||||
auth.clone()
|
||||
} else {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let url = self.get_full_url(&effective_auth);
|
||||
|
||||
let mut builder = client.post(url);
|
||||
|
||||
if let Some(auth) = effective_auth.as_ref() {
|
||||
builder = builder.bearer_auth(auth.get_token().await?);
|
||||
}
|
||||
|
||||
Ok(self.apply_http_headers(builder))
|
||||
}
|
||||
|
||||
fn get_query_string(&self) -> String {
|
||||
self.query_params
|
||||
.as_ref()
|
||||
.map_or_else(String::new, |params| {
|
||||
let full_params = params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{k}={v}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
format!("?{full_params}")
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn get_full_url(&self, auth: &Option<CodexAuth>) -> String {
|
||||
let default_base_url = if matches!(
|
||||
auth,
|
||||
Some(CodexAuth {
|
||||
mode: AuthMode::ChatGPT,
|
||||
..
|
||||
})
|
||||
) {
|
||||
"https://chatgpt.com/backend-api/codex"
|
||||
} else {
|
||||
"https://api.openai.com/v1"
|
||||
};
|
||||
let query_string = self.get_query_string();
|
||||
let base_url = self
|
||||
.base_url
|
||||
.clone()
|
||||
.unwrap_or(default_base_url.to_string());
|
||||
|
||||
match self.wire_api {
|
||||
WireApi::Responses => format!("{base_url}/responses{query_string}"),
|
||||
WireApi::Chat => format!("{base_url}/chat/completions{query_string}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply provider-specific HTTP headers (both static and environment-based)
|
||||
/// onto an existing `reqwest::RequestBuilder` and return the updated
|
||||
/// builder.
|
||||
fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
|
||||
if let Some(extra) = &self.http_headers {
|
||||
for (k, v) in extra {
|
||||
builder = builder.header(k, v);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(env_headers) = &self.env_http_headers {
|
||||
for (header, env_var) in env_headers {
|
||||
if let Ok(val) = std::env::var(env_var) {
|
||||
if !val.trim().is_empty() {
|
||||
builder = builder.header(header, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder
|
||||
}
|
||||
|
||||
/// If `env_key` is Some, returns the API key for this provider if present
|
||||
/// (and non-empty) in the environment. If `env_key` is required but
|
||||
/// cannot be found, returns an error.
|
||||
pub fn api_key(&self) -> crate::error::Result<Option<String>> {
|
||||
match &self.env_key {
|
||||
Some(env_key) => {
|
||||
let env_value = if env_key == crate::openai_api_key::OPENAI_API_KEY_ENV_VAR {
|
||||
get_openai_api_key().map_or_else(|| Err(VarError::NotPresent), Ok)
|
||||
} else {
|
||||
std::env::var(env_key)
|
||||
};
|
||||
let env_value = std::env::var(env_key);
|
||||
env_value
|
||||
.and_then(|v| {
|
||||
if v.trim().is_empty() {
|
||||
@@ -77,95 +202,213 @@ impl ModelProviderInfo {
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Effective maximum number of request retries for this provider.
|
||||
pub fn request_max_retries(&self) -> u64 {
|
||||
self.request_max_retries
|
||||
.unwrap_or(DEFAULT_REQUEST_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective maximum number of stream reconnection attempts for this provider.
|
||||
pub fn stream_max_retries(&self) -> u64 {
|
||||
self.stream_max_retries
|
||||
.unwrap_or(DEFAULT_STREAM_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective idle timeout for streaming responses.
|
||||
pub fn stream_idle_timeout(&self) -> Duration {
|
||||
self.stream_idle_timeout_ms
|
||||
.map(Duration::from_millis)
|
||||
.unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_OLLAMA_PORT: u32 = 11434;
|
||||
|
||||
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
|
||||
|
||||
/// Built-in default provider list.
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
use ModelProviderInfo as P;
|
||||
|
||||
// We do not want to be in the business of adjucating which third-party
|
||||
// providers are bundled with Codex CLI, so we only include the OpenAI and
|
||||
// open source ("oss") providers by default. Users are encouraged to add to
|
||||
// `model_providers` in config.toml to add their own providers.
|
||||
[
|
||||
(
|
||||
"openai",
|
||||
P {
|
||||
name: "OpenAI".into(),
|
||||
base_url: "https://api.openai.com/v1".into(),
|
||||
env_key: Some("OPENAI_API_KEY".into()),
|
||||
env_key_instructions: Some("Create an API key (https://platform.openai.com) and export it as an environment variable.".into()),
|
||||
wire_api: WireApi::Responses,
|
||||
},
|
||||
),
|
||||
(
|
||||
"openrouter",
|
||||
P {
|
||||
name: "OpenRouter".into(),
|
||||
base_url: "https://openrouter.ai/api/v1".into(),
|
||||
env_key: Some("OPENROUTER_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"gemini",
|
||||
P {
|
||||
name: "Gemini".into(),
|
||||
base_url: "https://generativelanguage.googleapis.com/v1beta/openai".into(),
|
||||
env_key: Some("GEMINI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"ollama",
|
||||
P {
|
||||
name: "Ollama".into(),
|
||||
base_url: "http://localhost:11434/v1".into(),
|
||||
// Allow users to override the default OpenAI endpoint by
|
||||
// exporting `OPENAI_BASE_URL`. This is useful when pointing
|
||||
// Codex at a proxy, mock server, or Azure-style deployment
|
||||
// without requiring a full TOML override for the built-in
|
||||
// OpenAI provider.
|
||||
base_url: std::env::var("OPENAI_BASE_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"mistral",
|
||||
P {
|
||||
name: "Mistral".into(),
|
||||
base_url: "https://api.mistral.ai/v1".into(),
|
||||
env_key: Some("MISTRAL_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"deepseek",
|
||||
P {
|
||||
name: "DeepSeek".into(),
|
||||
base_url: "https://api.deepseek.com".into(),
|
||||
env_key: Some("DEEPSEEK_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"xai",
|
||||
P {
|
||||
name: "xAI".into(),
|
||||
base_url: "https://api.x.ai/v1".into(),
|
||||
env_key: Some("XAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
},
|
||||
),
|
||||
(
|
||||
"groq",
|
||||
P {
|
||||
name: "Groq".into(),
|
||||
base_url: "https://api.groq.com/openai/v1".into(),
|
||||
env_key: Some("GROQ_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: Some(
|
||||
[("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
env_http_headers: Some(
|
||||
[
|
||||
(
|
||||
"OpenAI-Organization".to_string(),
|
||||
"OPENAI_ORGANIZATION".to_string(),
|
||||
),
|
||||
("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
// Use global defaults for retry/timeout unless overridden in config.toml.
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
requires_openai_auth: true,
|
||||
},
|
||||
),
|
||||
(BUILT_IN_OSS_MODEL_PROVIDER_ID, create_oss_provider()),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
// These CODEX_OSS_ environment variables are experimental: we may
|
||||
// switch to reading values from config.toml instead.
|
||||
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
{
|
||||
Some(url) => url,
|
||||
None => format!(
|
||||
"http://localhost:{port}/v1",
|
||||
port = std::env::var("CODEX_OSS_PORT")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.unwrap_or(DEFAULT_OLLAMA_PORT)
|
||||
),
|
||||
};
|
||||
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url)
|
||||
}
|
||||
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
|
||||
ModelProviderInfo {
|
||||
name: "gpt-oss".into(),
|
||||
base_url: Some(base_url.into()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
requires_openai_auth: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_ollama_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Ollama"
|
||||
base_url = "http://localhost:11434/v1"
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Ollama".into(),
|
||||
base_url: Some("http://localhost:11434/v1".into()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_azure_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Azure"
|
||||
base_url = "https://xxxxx.openai.azure.com/openai"
|
||||
env_key = "AZURE_OPENAI_API_KEY"
|
||||
query_params = { api-version = "2025-04-01-preview" }
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Azure".into(),
|
||||
base_url: Some("https://xxxxx.openai.azure.com/openai".into()),
|
||||
env_key: Some("AZURE_OPENAI_API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: Some(maplit::hashmap! {
|
||||
"api-version".to_string() => "2025-04-01-preview".to_string(),
|
||||
}),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_example_model_provider_toml() {
|
||||
let azure_provider_toml = r#"
|
||||
name = "Example"
|
||||
base_url = "https://example.com"
|
||||
env_key = "API_KEY"
|
||||
http_headers = { "X-Example-Header" = "example-value" }
|
||||
env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
|
||||
"#;
|
||||
let expected_provider = ModelProviderInfo {
|
||||
name: "Example".into(),
|
||||
base_url: Some("https://example.com".into()),
|
||||
env_key: Some("API_KEY".into()),
|
||||
env_key_instructions: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: Some(maplit::hashmap! {
|
||||
"X-Example-Header".to_string() => "example-value".to_string(),
|
||||
}),
|
||||
env_http_headers: Some(maplit::hashmap! {
|
||||
"X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
|
||||
}),
|
||||
request_max_retries: None,
|
||||
stream_max_retries: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
assert_eq!(expected_provider, provider);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,12 +3,13 @@ use std::collections::HashMap;
|
||||
use base64::Engine;
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
use serde::Deserializer;
|
||||
use serde::Serialize;
|
||||
use serde::ser::Serializer;
|
||||
|
||||
use crate::protocol::InputItem;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseInputItem {
|
||||
Message {
|
||||
@@ -25,7 +26,7 @@ pub enum ResponseInputItem {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ContentItem {
|
||||
InputText { text: String },
|
||||
@@ -33,16 +34,20 @@ pub enum ContentItem {
|
||||
OutputText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ResponseItem {
|
||||
Message {
|
||||
id: Option<String>,
|
||||
role: String,
|
||||
content: Vec<ContentItem>,
|
||||
},
|
||||
Reasoning {
|
||||
id: String,
|
||||
summary: Vec<ReasoningItemReasoningSummary>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
content: Option<Vec<ReasoningItemContent>>,
|
||||
encrypted_content: Option<String>,
|
||||
},
|
||||
LocalShellCall {
|
||||
/// Set when using the chat completions API.
|
||||
@@ -53,6 +58,7 @@ pub enum ResponseItem {
|
||||
action: LocalShellAction,
|
||||
},
|
||||
FunctionCall {
|
||||
id: Option<String>,
|
||||
name: String,
|
||||
// The Responses API returns the function call arguments as a *string* that contains
|
||||
// JSON, not as an already‑parsed object. We keep it as a raw string here and let
|
||||
@@ -78,7 +84,11 @@ pub enum ResponseItem {
|
||||
impl From<ResponseInputItem> for ResponseItem {
|
||||
fn from(item: ResponseInputItem) -> Self {
|
||||
match item {
|
||||
ResponseInputItem::Message { role, content } => Self::Message { role, content },
|
||||
ResponseInputItem::Message { role, content } => Self::Message {
|
||||
role,
|
||||
content,
|
||||
id: None,
|
||||
},
|
||||
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
||||
Self::FunctionCallOutput { call_id, output }
|
||||
}
|
||||
@@ -99,7 +109,7 @@ impl From<ResponseInputItem> for ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum LocalShellStatus {
|
||||
Completed,
|
||||
@@ -107,13 +117,13 @@ pub enum LocalShellStatus {
|
||||
Incomplete,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum LocalShellAction {
|
||||
Exec(LocalShellExecAction),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct LocalShellExecAction {
|
||||
pub command: Vec<String>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
@@ -122,12 +132,18 @@ pub struct LocalShellExecAction {
|
||||
pub user: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ReasoningItemReasoningSummary {
|
||||
SummaryText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ReasoningItemContent {
|
||||
ReasoningText { text: String },
|
||||
}
|
||||
|
||||
impl From<Vec<InputItem>> for ResponseInputItem {
|
||||
fn from(items: Vec<InputItem>) -> Self {
|
||||
Self::Message {
|
||||
@@ -145,7 +161,7 @@ impl From<Vec<InputItem>> for ResponseInputItem {
|
||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
|
||||
Some(ContentItem::InputImage {
|
||||
image_url: format!("data:{};base64,{}", mime, encoded),
|
||||
image_url: format!("data:{mime};base64,{encoded}"),
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -175,12 +191,15 @@ pub struct ShellToolCallParams {
|
||||
// The wire format uses `timeout`, which has ambiguous units, so we use
|
||||
// `timeout_ms` as the field name so it is clear in code.
|
||||
pub timeout_ms: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FunctionCallOutputPayload {
|
||||
pub content: String,
|
||||
#[expect(dead_code)]
|
||||
pub success: Option<bool>,
|
||||
}
|
||||
|
||||
@@ -205,6 +224,19 @@ impl Serialize for FunctionCallOutputPayload {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s = String::deserialize(deserializer)?;
|
||||
Ok(FunctionCallOutputPayload {
|
||||
content: s,
|
||||
success: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Implement Display so callers can treat the payload like a plain string when logging or doing
|
||||
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
|
||||
// returns the raw `content` field.
|
||||
@@ -274,6 +306,8 @@ mod tests {
|
||||
command: vec!["ls".to_string(), "-l".to_string()],
|
||||
workdir: Some("/tmp".to_string()),
|
||||
timeout_ms: Some(1000),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
params
|
||||
);
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
use std::env;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::RwLock;
|
||||
|
||||
pub const OPENAI_API_KEY_ENV_VAR: &str = "OPENAI_API_KEY";
|
||||
|
||||
static OPENAI_API_KEY: LazyLock<RwLock<Option<String>>> = LazyLock::new(|| {
|
||||
let val = env::var(OPENAI_API_KEY_ENV_VAR)
|
||||
.ok()
|
||||
.and_then(|s| if s.is_empty() { None } else { Some(s) });
|
||||
RwLock::new(val)
|
||||
});
|
||||
|
||||
pub fn get_openai_api_key() -> Option<String> {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
OPENAI_API_KEY.read().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn set_openai_api_key(value: String) {
|
||||
#![allow(clippy::unwrap_used)]
|
||||
if !value.is_empty() {
|
||||
*OPENAI_API_KEY.write().unwrap() = Some(value);
|
||||
}
|
||||
}
|
||||
87
codex-rs/core/src/openai_model_info.rs
Normal file
87
codex-rs/core/src/openai_model_info.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
use crate::model_family::ModelFamily;
|
||||
|
||||
/// Metadata about a model, particularly OpenAI models.
|
||||
/// We may want to consider including details like the pricing for
|
||||
/// input tokens, output tokens, etc., though users will need to be able to
|
||||
/// override this in config.toml, as this information can get out of date.
|
||||
/// Though this would help present more accurate pricing information in the UI.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ModelInfo {
|
||||
/// Size of the context window in tokens.
|
||||
pub(crate) context_window: u64,
|
||||
|
||||
/// Maximum number of output tokens that can be generated for the model.
|
||||
pub(crate) max_output_tokens: u64,
|
||||
}
|
||||
|
||||
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||
match model_family.slug.as_str() {
|
||||
// OSS models have a 128k shared token pool.
|
||||
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
|
||||
// https://openai.com/index/gpt-oss-model-card/
|
||||
"gpt-oss-20b" => Some(ModelInfo {
|
||||
context_window: 96_000,
|
||||
max_output_tokens: 32_000,
|
||||
}),
|
||||
"gpt-oss-120b" => Some(ModelInfo {
|
||||
context_window: 96_000,
|
||||
max_output_tokens: 32_000,
|
||||
}),
|
||||
// https://platform.openai.com/docs/models/o3
|
||||
"o3" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/o4-mini
|
||||
"o4-mini" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/codex-mini-latest
|
||||
"codex-mini-latest" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
||||
// https://platform.openai.com/docs/models/gpt-4.1
|
||||
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
|
||||
context_window: 1_047_576,
|
||||
max_output_tokens: 32_768,
|
||||
}),
|
||||
|
||||
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
||||
// https://platform.openai.com/docs/models/gpt-4o
|
||||
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
||||
"gpt-4o-2024-05-13" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
||||
"gpt-4o-2024-11-20" => Some(ModelInfo {
|
||||
context_window: 128_000,
|
||||
max_output_tokens: 16_384,
|
||||
}),
|
||||
|
||||
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
||||
"gpt-3.5-turbo" => Some(ModelInfo {
|
||||
context_window: 16_385,
|
||||
max_output_tokens: 4_096,
|
||||
}),
|
||||
|
||||
"2025-08-06-model" => Some(ModelInfo {
|
||||
context_window: 200_000,
|
||||
max_output_tokens: 100_000,
|
||||
}),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -1,21 +1,28 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::LazyLock;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::client_common::Prompt;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::plan_tool::PLAN_TOOL;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub(crate) struct ResponsesApiTool {
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
strict: bool,
|
||||
parameters: JsonSchema,
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct ResponsesApiTool {
|
||||
pub(crate) name: String,
|
||||
pub(crate) description: String,
|
||||
/// TODO: Validation. When strict is set to true, the JSON schema,
|
||||
/// `required` and `additional_properties` must be present. All fields in
|
||||
/// `properties` must be present in `required`.
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) parameters: JsonSchema,
|
||||
}
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
/// Responses API.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
#[serde(tag = "type")]
|
||||
pub(crate) enum OpenAiTool {
|
||||
#[serde(rename = "function")]
|
||||
@@ -24,74 +31,218 @@ pub(crate) enum OpenAiTool {
|
||||
LocalShell {},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConfigShellToolType {
|
||||
DefaultShell,
|
||||
ShellWithRequest { sandbox_policy: SandboxPolicy },
|
||||
LocalShell,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ToolsConfig {
|
||||
pub shell_type: ConfigShellToolType,
|
||||
pub plan_tool: bool,
|
||||
}
|
||||
|
||||
impl ToolsConfig {
|
||||
pub fn new(
|
||||
model_family: &ModelFamily,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
include_plan_tool: bool,
|
||||
) -> Self {
|
||||
let mut shell_type = if model_family.uses_local_shell_tool {
|
||||
ConfigShellToolType::LocalShell
|
||||
} else {
|
||||
ConfigShellToolType::DefaultShell
|
||||
};
|
||||
if matches!(approval_policy, AskForApproval::OnRequest) {
|
||||
shell_type = ConfigShellToolType::ShellWithRequest {
|
||||
sandbox_policy: sandbox_policy.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
shell_type,
|
||||
plan_tool: include_plan_tool,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic JSON‑Schema subset needed for our tool definitions
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub(crate) enum JsonSchema {
|
||||
String,
|
||||
Number,
|
||||
Boolean {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<String>,
|
||||
},
|
||||
String {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<String>,
|
||||
},
|
||||
Number {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<String>,
|
||||
},
|
||||
Array {
|
||||
items: Box<JsonSchema>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<String>,
|
||||
},
|
||||
Object {
|
||||
properties: BTreeMap<String, JsonSchema>,
|
||||
required: &'static [&'static str],
|
||||
#[serde(rename = "additionalProperties")]
|
||||
additional_properties: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
required: Option<Vec<String>>,
|
||||
#[serde(
|
||||
rename = "additionalProperties",
|
||||
skip_serializing_if = "Option::is_none"
|
||||
)]
|
||||
additional_properties: Option<bool>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Tool usage specification
|
||||
static DEFAULT_TOOLS: LazyLock<Vec<OpenAiTool>> = LazyLock::new(|| {
|
||||
fn create_shell_tool() -> OpenAiTool {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String),
|
||||
items: Box::new(JsonSchema::String { description: None }),
|
||||
description: None,
|
||||
},
|
||||
);
|
||||
properties.insert("workdir".to_string(), JsonSchema::String);
|
||||
properties.insert("timeout".to_string(), JsonSchema::Number);
|
||||
properties.insert(
|
||||
"workdir".to_string(),
|
||||
JsonSchema::String { description: None },
|
||||
);
|
||||
properties.insert(
|
||||
"timeout".to_string(),
|
||||
JsonSchema::Number { description: None },
|
||||
);
|
||||
|
||||
vec![OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell",
|
||||
description: "Runs a shell command, and returns its output.",
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Runs a shell command and returns its output".to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: &["command"],
|
||||
additional_properties: false,
|
||||
required: Some(vec!["command".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})]
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
|
||||
LazyLock::new(|| vec![OpenAiTool::LocalShell {}]);
|
||||
fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String { description: None }),
|
||||
description: Some("The command to execute".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"workdir".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The working directory to execute the command in".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"timeout".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
if matches!(sandbox_policy, SandboxPolicy::WorkspaceWrite { .. }) {
|
||||
properties.insert(
|
||||
"with_escalated_permissions".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Only set if ask_for_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let description = match sandbox_policy {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
network_access,
|
||||
..
|
||||
} => {
|
||||
format!(
|
||||
r#"
|
||||
The shell tool is used to execute shell commands.
|
||||
- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands will require escalated privileges:
|
||||
- Types of actions that require escalated privileges:
|
||||
- Reading files outside the current directory
|
||||
- Writing files outside the current directory, and protected folders like .git or .env{}
|
||||
- Examples of commands that require escalated privileges:
|
||||
- git commit
|
||||
- npm install or pnpm install
|
||||
- cargo build
|
||||
- cargo test
|
||||
- When invoking a command that will require escalated privileges:
|
||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#,
|
||||
if !network_access {
|
||||
"\n - Commands that require network access\n"
|
||||
} else {
|
||||
""
|
||||
}
|
||||
)
|
||||
}
|
||||
SandboxPolicy::DangerFullAccess => {
|
||||
"Runs a shell command and returns its output.".to_string()
|
||||
}
|
||||
SandboxPolicy::ReadOnly => {
|
||||
r#"
|
||||
The shell tool is used to execute shell commands.
|
||||
- When invoking the shell tool, your call will be running in a landlock sandbox, and some shell commands (including apply_patch) will require escalated permissions:
|
||||
- Types of actions that require escalated privileges:
|
||||
- Reading files outside the current directory
|
||||
- Writing files
|
||||
- Applying patches
|
||||
- Examples of commands that require escalated privileges:
|
||||
- apply_patch
|
||||
- git commit
|
||||
- npm install or pnpm install
|
||||
- cargo build
|
||||
- cargo test
|
||||
- When invoking a command that will require escalated privileges:
|
||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#.to_string()
|
||||
}
|
||||
};
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["command".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns JSON values that are compatible with Function Calling in the
|
||||
/// Responses API:
|
||||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
|
||||
pub(crate) fn create_tools_json_for_responses_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
tools: &Vec<OpenAiTool>,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// Assemble tool list: built-in tools + any extra tools from the prompt.
|
||||
let default_tools = if model.starts_with("codex") {
|
||||
&DEFAULT_CODEX_MODEL_TOOLS
|
||||
} else {
|
||||
&DEFAULT_TOOLS
|
||||
};
|
||||
let mut tools_json = Vec::with_capacity(default_tools.len() + prompt.extra_tools.len());
|
||||
for t in default_tools.iter() {
|
||||
tools_json.push(serde_json::to_value(t)?);
|
||||
let mut tools_json = Vec::new();
|
||||
|
||||
for tool in tools {
|
||||
tools_json.push(serde_json::to_value(tool)?);
|
||||
}
|
||||
tools_json.extend(
|
||||
prompt
|
||||
.extra_tools
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
|
||||
);
|
||||
|
||||
Ok(tools_json)
|
||||
}
|
||||
@@ -100,12 +251,11 @@ pub(crate) fn create_tools_json_for_responses_api(
|
||||
/// Chat Completions API:
|
||||
/// https://platform.openai.com/docs/guides/function-calling?api-mode=chat
|
||||
pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
prompt: &Prompt,
|
||||
model: &str,
|
||||
tools: &Vec<OpenAiTool>,
|
||||
) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
// We start with the JSON for the Responses API and than rewrite it to match
|
||||
// the chat completions tool call format.
|
||||
let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
|
||||
let responses_api_tools_json = create_tools_json_for_responses_api(tools)?;
|
||||
let tools_json = responses_api_tools_json
|
||||
.into_iter()
|
||||
.filter_map(|mut tool| {
|
||||
@@ -128,10 +278,10 @@ pub(crate) fn create_tools_json_for_chat_completions_api(
|
||||
Ok(tools_json)
|
||||
}
|
||||
|
||||
fn mcp_tool_to_openai_tool(
|
||||
pub(crate) fn mcp_tool_to_openai_tool(
|
||||
fully_qualified_name: String,
|
||||
tool: mcp_types::Tool,
|
||||
) -> serde_json::Value {
|
||||
) -> Result<ResponsesApiTool, serde_json::Error> {
|
||||
let mcp_types::Tool {
|
||||
description,
|
||||
mut input_schema,
|
||||
@@ -146,12 +296,205 @@ fn mcp_tool_to_openai_tool(
|
||||
input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new()));
|
||||
}
|
||||
|
||||
// TODO(mbolin): Change the contract of this function to return
|
||||
// ResponsesApiTool.
|
||||
json!({
|
||||
"name": fully_qualified_name,
|
||||
"description": description,
|
||||
"parameters": input_schema,
|
||||
"type": "function",
|
||||
let serialized_input_schema = serde_json::to_value(input_schema)?;
|
||||
let input_schema = serde_json::from_value::<JsonSchema>(serialized_input_schema)?;
|
||||
|
||||
Ok(ResponsesApiTool {
|
||||
name: fully_qualified_name,
|
||||
description: description.unwrap_or_default(),
|
||||
strict: false,
|
||||
parameters: input_schema,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a list of OpenAiTools based on the provided config and MCP tools.
|
||||
/// Note that the keys of mcp_tools should be fully qualified names. See
|
||||
/// [`McpConnectionManager`] for more details.
|
||||
pub(crate) fn get_openai_tools(
|
||||
config: &ToolsConfig,
|
||||
mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
|
||||
) -> Vec<OpenAiTool> {
|
||||
let mut tools: Vec<OpenAiTool> = Vec::new();
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::DefaultShell => {
|
||||
tools.push(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
|
||||
tools.push(create_shell_tool_for_sandbox(sandbox_policy));
|
||||
}
|
||||
ConfigShellToolType::LocalShell => {
|
||||
tools.push(OpenAiTool::LocalShell {});
|
||||
}
|
||||
}
|
||||
|
||||
if config.plan_tool {
|
||||
tools.push(PLAN_TOOL.clone());
|
||||
}
|
||||
|
||||
if let Some(mcp_tools) = mcp_tools {
|
||||
for (name, tool) in mcp_tools {
|
||||
match mcp_tool_to_openai_tool(name.clone(), tool.clone()) {
|
||||
Ok(converted_tool) => tools.push(OpenAiTool::Function(converted_tool)),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to convert {name:?} MCP tool to OpenAI tool: {e:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tools
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::expect_used)]
|
||||
mod tests {
|
||||
use crate::model_family::find_family_for_model;
|
||||
use mcp_types::ToolInputSchema;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn assert_eq_tool_names(tools: &[OpenAiTool], expected_names: &[&str]) {
|
||||
let tool_names = tools
|
||||
.iter()
|
||||
.map(|tool| match tool {
|
||||
OpenAiTool::Function(ResponsesApiTool { name, .. }) => name,
|
||||
OpenAiTool::LocalShell {} => "local_shell",
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
tool_names.len(),
|
||||
expected_names.len(),
|
||||
"tool_name mismatch, {tool_names:?}, {expected_names:?}",
|
||||
);
|
||||
for (name, expected_name) in tool_names.iter().zip(expected_names.iter()) {
|
||||
assert_eq!(
|
||||
name, expected_name,
|
||||
"tool_name mismatch, {name:?}, {expected_name:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_openai_tools() {
|
||||
let model_family = find_family_for_model("codex-mini-latest")
|
||||
.expect("codex-mini-latest should be a valid model family");
|
||||
let config = ToolsConfig::new(
|
||||
&model_family,
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::ReadOnly,
|
||||
true,
|
||||
);
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["local_shell", "update_plan"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_openai_tools_default_shell() {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(
|
||||
&model_family,
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::ReadOnly,
|
||||
true,
|
||||
);
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "update_plan"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_openai_tools_mcp_tools() {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let config = ToolsConfig::new(
|
||||
&model_family,
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::ReadOnly,
|
||||
false,
|
||||
);
|
||||
let tools = get_openai_tools(
|
||||
&config,
|
||||
Some(HashMap::from([(
|
||||
"test_server/do_something_cool".to_string(),
|
||||
mcp_types::Tool {
|
||||
name: "do_something_cool".to_string(),
|
||||
input_schema: ToolInputSchema {
|
||||
properties: Some(serde_json::json!({
|
||||
"string_argument": {
|
||||
"type": "string",
|
||||
},
|
||||
"number_argument": {
|
||||
"type": "number",
|
||||
},
|
||||
"object_argument": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"string_property": { "type": "string" },
|
||||
"number_property": { "type": "number" },
|
||||
},
|
||||
"required": [
|
||||
"string_property",
|
||||
"number_property"
|
||||
],
|
||||
"additionalProperties": Some(false),
|
||||
},
|
||||
})),
|
||||
required: None,
|
||||
r#type: "object".to_string(),
|
||||
},
|
||||
output_schema: None,
|
||||
title: None,
|
||||
annotations: None,
|
||||
description: Some("Do something cool".to_string()),
|
||||
},
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "test_server/do_something_cool"]);
|
||||
|
||||
assert_eq!(
|
||||
tools[1],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "test_server/do_something_cool".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
properties: BTreeMap::from([
|
||||
(
|
||||
"string_argument".to_string(),
|
||||
JsonSchema::String { description: None }
|
||||
),
|
||||
(
|
||||
"number_argument".to_string(),
|
||||
JsonSchema::Number { description: None }
|
||||
),
|
||||
(
|
||||
"object_argument".to_string(),
|
||||
JsonSchema::Object {
|
||||
properties: BTreeMap::from([
|
||||
(
|
||||
"string_property".to_string(),
|
||||
JsonSchema::String { description: None }
|
||||
),
|
||||
(
|
||||
"number_property".to_string(),
|
||||
JsonSchema::Number { description: None }
|
||||
),
|
||||
]),
|
||||
required: Some(vec![
|
||||
"string_property".to_string(),
|
||||
"number_property".to_string(),
|
||||
]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
),
|
||||
]),
|
||||
required: None,
|
||||
additional_properties: None,
|
||||
},
|
||||
description: "Do something cool".to_string(),
|
||||
strict: false,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
133
codex-rs/core/src/plan_tool.rs
Normal file
133
codex-rs/core/src/plan_tool.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::models::FunctionCallOutputPayload;
|
||||
use crate::models::ResponseInputItem;
|
||||
use crate::openai_tools::JsonSchema;
|
||||
use crate::openai_tools::OpenAiTool;
|
||||
use crate::openai_tools::ResponsesApiTool;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
|
||||
// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum StepStatus {
|
||||
Pending,
|
||||
InProgress,
|
||||
Completed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct PlanItemArg {
|
||||
pub step: String,
|
||||
pub status: StepStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct UpdatePlanArgs {
|
||||
#[serde(default)]
|
||||
pub explanation: Option<String>,
|
||||
pub plan: Vec<PlanItemArg>,
|
||||
}
|
||||
|
||||
pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
|
||||
let mut plan_item_props = BTreeMap::new();
|
||||
plan_item_props.insert("step".to_string(), JsonSchema::String { description: None });
|
||||
plan_item_props.insert(
|
||||
"status".to_string(),
|
||||
JsonSchema::String { description: None },
|
||||
);
|
||||
|
||||
let plan_items_schema = JsonSchema::Array {
|
||||
description: Some("The list of steps".to_string()),
|
||||
items: Box::new(JsonSchema::Object {
|
||||
properties: plan_item_props,
|
||||
required: Some(vec!["step".to_string(), "status".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
}),
|
||||
};
|
||||
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"explanation".to_string(),
|
||||
JsonSchema::String { description: None },
|
||||
);
|
||||
properties.insert("plan".to_string(), plan_items_schema);
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "update_plan".to_string(),
|
||||
description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
|
||||
After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
|
||||
1. Explore the codebase to find relevant files (status: in_progress)
|
||||
2. Implement the feature in the XYZ component (status: pending)
|
||||
3. Commit changes and make a pull request (status: pending)
|
||||
Each step should be a short, 1-sentence description.
|
||||
Until all the steps are finished, there should always be exactly one in_progress step in the plan.
|
||||
Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
|
||||
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
|
||||
Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
|
||||
When all steps are completed, call update_plan one last time with all steps marked as `completed`."#.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["plan".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})
|
||||
});
|
||||
|
||||
/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
|
||||
/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
|
||||
/// than forcing it to come up and document a plan (TBD how that affects performance).
|
||||
pub(crate) async fn handle_update_plan(
|
||||
session: &Session,
|
||||
arguments: String,
|
||||
sub_id: String,
|
||||
call_id: String,
|
||||
) -> ResponseInputItem {
|
||||
match parse_update_plan_arguments(arguments, &call_id) {
|
||||
Ok(args) => {
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "Plan updated".to_string(),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
session
|
||||
.send_event(Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::PlanUpdate(args),
|
||||
})
|
||||
.await;
|
||||
output
|
||||
}
|
||||
Err(output) => *output,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_update_plan_arguments(
|
||||
arguments: String,
|
||||
call_id: &str,
|
||||
) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
|
||||
match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
|
||||
Ok(args) => Ok(args),
|
||||
Err(e) => {
|
||||
let output = ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("failed to parse function arguments: {e}"),
|
||||
success: None,
|
||||
},
|
||||
};
|
||||
Err(Box::new(output))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,16 +27,16 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||||
/// string of instructions.
|
||||
pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||||
match find_project_doc(config).await {
|
||||
Ok(Some(project_doc)) => match &config.instructions {
|
||||
Ok(Some(project_doc)) => match &config.user_instructions {
|
||||
Some(original_instructions) => Some(format!(
|
||||
"{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||||
)),
|
||||
None => Some(project_doc),
|
||||
},
|
||||
Ok(None) => config.instructions.clone(),
|
||||
Ok(None) => config.user_instructions.clone(),
|
||||
Err(e) => {
|
||||
error!("error trying to find project doc: {e:#}");
|
||||
config.instructions.clone()
|
||||
config.user_instructions.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -159,7 +159,7 @@ mod tests {
|
||||
config.cwd = root.path().to_path_buf();
|
||||
config.project_doc_max_bytes = limit;
|
||||
|
||||
config.instructions = instructions.map(ToOwned::to_owned);
|
||||
config.user_instructions = instructions.map(ToOwned::to_owned);
|
||||
config
|
||||
}
|
||||
|
||||
|
||||
@@ -4,18 +4,24 @@
|
||||
//! between user and agent.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use mcp_types::CallToolResult;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_bytes::ByteBuf;
|
||||
use strum_macros::Display;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::message_history::HistoryEntry;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::plan_tool::UpdatePlanArgs;
|
||||
|
||||
/// Submission Queue Entry - requests from user
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
@@ -43,8 +49,12 @@ pub enum Op {
|
||||
model_reasoning_effort: ReasoningEffortConfig,
|
||||
model_reasoning_summary: ReasoningSummaryConfig,
|
||||
|
||||
/// Model instructions
|
||||
instructions: Option<String>,
|
||||
/// Model instructions that are appended to the base instructions.
|
||||
user_instructions: Option<String>,
|
||||
|
||||
/// Base instructions override.
|
||||
base_instructions: Option<String>,
|
||||
|
||||
/// When to escalate for approval for execution
|
||||
approval_policy: AskForApproval,
|
||||
/// How to sandbox commands executed in the system
|
||||
@@ -68,6 +78,10 @@ pub enum Op {
|
||||
/// `ConfigureSession` operation so that the business-logic layer can
|
||||
/// operate deterministically.
|
||||
cwd: std::path::PathBuf,
|
||||
|
||||
/// Path to a rollout file to resume from.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
resume_path: Option<std::path::PathBuf>,
|
||||
},
|
||||
|
||||
/// Abort current task.
|
||||
@@ -107,23 +121,28 @@ pub enum Op {
|
||||
|
||||
/// Request a single history entry identified by `log_id` + `offset`.
|
||||
GetHistoryEntryRequest { offset: usize, log_id: u64 },
|
||||
|
||||
/// Request the agent to summarize the current conversation context.
|
||||
/// The agent will use its existing context (either conversation history or previous response id)
|
||||
/// to generate a summary which will be returned as an AgentMessage event.
|
||||
Compact,
|
||||
/// Request to shut down codex instance.
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
/// Determines how liberally commands are auto‑approved by the system.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
/// Determines the conditions under which the user is consulted to approve
|
||||
/// running the command proposed by Codex.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize, Display)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
#[strum(serialize_all = "kebab-case")]
|
||||
pub enum AskForApproval {
|
||||
/// Under this policy, only “known safe” commands—as determined by
|
||||
/// Under this policy, only "known safe" commands—as determined by
|
||||
/// `is_safe_command()`—that **only read files** are auto‑approved.
|
||||
/// Everything else will ask the user to approve.
|
||||
#[default]
|
||||
UnlessAllowListed,
|
||||
|
||||
/// In addition to everything allowed by **`Suggest`**, commands that
|
||||
/// *write* to files **within the user’s approved list of writable paths**
|
||||
/// are also auto‑approved.
|
||||
/// TODO(ragona): fix
|
||||
AutoEdit,
|
||||
#[serde(rename = "untrusted")]
|
||||
#[strum(serialize = "untrusted")]
|
||||
UnlessTrusted,
|
||||
|
||||
/// *All* commands are auto‑approved, but they are expected to run inside a
|
||||
/// sandbox where network access is disabled and writes are confined to a
|
||||
@@ -131,160 +150,158 @@ pub enum AskForApproval {
|
||||
/// the user to approve execution without a sandbox.
|
||||
OnFailure,
|
||||
|
||||
/// The model decides when to ask the user for approval.
|
||||
OnRequest,
|
||||
|
||||
/// Never ask the user to approve commands. Failures are immediately returned
|
||||
/// to the model, and never escalated to the user for approval.
|
||||
Never,
|
||||
}
|
||||
|
||||
/// Determines execution restrictions for model shell commands
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct SandboxPolicy {
|
||||
permissions: Vec<SandboxPermission>,
|
||||
/// Determines execution restrictions for model shell commands.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Display)]
|
||||
#[strum(serialize_all = "kebab-case")]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
pub enum SandboxPolicy {
|
||||
/// No restrictions whatsoever. Use with caution.
|
||||
#[serde(rename = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
|
||||
/// Read-only access to the entire file-system.
|
||||
#[serde(rename = "read-only")]
|
||||
ReadOnly,
|
||||
|
||||
/// Same as `ReadOnly` but additionally grants write access to the current
|
||||
/// working directory ("workspace").
|
||||
#[serde(rename = "workspace-write")]
|
||||
WorkspaceWrite {
|
||||
/// Additional folders (beyond cwd and possibly TMPDIR) that should be
|
||||
/// writable from within the sandbox.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
|
||||
/// When set to `true`, outbound network access is allowed. `false` by
|
||||
/// default.
|
||||
#[serde(default)]
|
||||
network_access: bool,
|
||||
|
||||
/// When set to `true`, will include defaults like the current working
|
||||
/// directory and TMPDIR (on macOS). When `false`, only `writable_roots`
|
||||
/// are used. (Mainly used for testing.)
|
||||
#[serde(default = "default_true")]
|
||||
include_default_writable_roots: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Vec<SandboxPermission>> for SandboxPolicy {
|
||||
fn from(permissions: Vec<SandboxPermission>) -> Self {
|
||||
Self { permissions }
|
||||
/// A writable root path accompanied by a list of subpaths that should remain
|
||||
/// read‑only even when the root is writable. This is primarily used to ensure
|
||||
/// top‑level VCS metadata directories (e.g. `.git`) under a writable root are
|
||||
/// not modified by the agent.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct WritableRoot {
|
||||
pub root: PathBuf,
|
||||
pub read_only_subpaths: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
fn default_true() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl FromStr for SandboxPolicy {
|
||||
type Err = serde_json::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
serde_json::from_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxPolicy {
|
||||
/// Returns a policy with read-only disk access and no network.
|
||||
pub fn new_read_only_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![SandboxPermission::DiskFullReadAccess],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_read_only_policy_with_writable_roots(writable_roots: &[PathBuf]) -> Self {
|
||||
let mut permissions = Self::new_read_only_policy().permissions;
|
||||
permissions.extend(writable_roots.iter().map(|folder| {
|
||||
SandboxPermission::DiskWriteFolder {
|
||||
folder: folder.clone(),
|
||||
}
|
||||
}));
|
||||
Self { permissions }
|
||||
}
|
||||
|
||||
pub fn new_full_auto_policy() -> Self {
|
||||
Self {
|
||||
permissions: vec![
|
||||
SandboxPermission::DiskFullReadAccess,
|
||||
SandboxPermission::DiskWritePlatformUserTempFolder,
|
||||
SandboxPermission::DiskWriteCwd,
|
||||
],
|
||||
SandboxPolicy::ReadOnly
|
||||
}
|
||||
|
||||
/// Returns a policy that can read the entire disk, but can only write to
|
||||
/// the current working directory and the per-user tmp dir on macOS. It does
|
||||
/// not allow network access.
|
||||
pub fn new_workspace_write_policy() -> Self {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![],
|
||||
network_access: false,
|
||||
include_default_writable_roots: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Always returns `true` for now, as we do not yet support restricting read
|
||||
/// access.
|
||||
pub fn has_full_disk_read_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullReadAccess))
|
||||
true
|
||||
}
|
||||
|
||||
pub fn has_full_disk_write_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::DiskFullWriteAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { .. } => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_full_network_access(&self) -> bool {
|
||||
self.permissions
|
||||
.iter()
|
||||
.any(|perm| matches!(perm, SandboxPermission::NetworkFullAccess))
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => true,
|
||||
SandboxPolicy::ReadOnly => false,
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => *network_access,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut writable_roots = Vec::<PathBuf>::new();
|
||||
for perm in &self.permissions {
|
||||
use SandboxPermission::*;
|
||||
match perm {
|
||||
DiskWritePlatformUserTempFolder => {
|
||||
/// Returns the list of writable roots (tailored to the current working
|
||||
/// directory) together with subpaths that should remain read‑only under
|
||||
/// each writable root.
|
||||
pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<WritableRoot> {
|
||||
match self {
|
||||
SandboxPolicy::DangerFullAccess => Vec::new(),
|
||||
SandboxPolicy::ReadOnly => Vec::new(),
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
include_default_writable_roots,
|
||||
..
|
||||
} => {
|
||||
// Start from explicitly configured writable roots.
|
||||
let mut roots: Vec<PathBuf> = writable_roots.clone();
|
||||
|
||||
// Optionally include defaults (cwd and TMPDIR on macOS).
|
||||
if *include_default_writable_roots {
|
||||
roots.push(cwd.to_path_buf());
|
||||
|
||||
// Also include the per-user tmp dir on macOS.
|
||||
// Note this is added dynamically rather than storing it in
|
||||
// `writable_roots` because `writable_roots` contains only static
|
||||
// values deserialized from the config file.
|
||||
if cfg!(target_os = "macos") {
|
||||
if let Some(tempdir) = std::env::var_os("TMPDIR") {
|
||||
// Likely something that starts with /var/folders/...
|
||||
let tmpdir_path = PathBuf::from(&tempdir);
|
||||
if tmpdir_path.is_absolute() {
|
||||
writable_roots.push(tmpdir_path.clone());
|
||||
match tmpdir_path.canonicalize() {
|
||||
Ok(canonicalized) => {
|
||||
// Likely something that starts with /private/var/folders/...
|
||||
if canonicalized != tmpdir_path {
|
||||
writable_roots.push(canonicalized);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to canonicalize TMPDIR: {e}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::error!("TMPDIR is not an absolute path: {tempdir:?}");
|
||||
}
|
||||
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
|
||||
roots.push(PathBuf::from(tmpdir));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For Linux, should this be XDG_RUNTIME_DIR, /run/user/<uid>, or something else?
|
||||
}
|
||||
DiskWritePlatformGlobalTempFolder => {
|
||||
if cfg!(unix) {
|
||||
writable_roots.push(PathBuf::from("/tmp"));
|
||||
}
|
||||
}
|
||||
DiskWriteCwd => {
|
||||
writable_roots.push(cwd.to_path_buf());
|
||||
}
|
||||
DiskWriteFolder { folder } => {
|
||||
writable_roots.push(folder.clone());
|
||||
}
|
||||
DiskFullReadAccess | NetworkFullAccess => {}
|
||||
DiskFullWriteAccess => {
|
||||
// Currently, we expect callers to only invoke this method
|
||||
// after verifying has_full_disk_write_access() is false.
|
||||
}
|
||||
// For each root, compute subpaths that should remain read-only.
|
||||
roots
|
||||
.into_iter()
|
||||
.map(|writable_root| {
|
||||
let mut subpaths = Vec::new();
|
||||
let top_level_git = writable_root.join(".git");
|
||||
if top_level_git.is_dir() {
|
||||
subpaths.push(top_level_git);
|
||||
}
|
||||
WritableRoot {
|
||||
root: writable_root,
|
||||
read_only_subpaths: subpaths,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
writable_roots
|
||||
}
|
||||
|
||||
pub fn is_unrestricted(&self) -> bool {
|
||||
self.has_full_disk_read_access()
|
||||
&& self.has_full_disk_write_access()
|
||||
&& self.has_full_network_access()
|
||||
}
|
||||
}
|
||||
|
||||
/// Permissions that should be granted to the sandbox in which the agent
|
||||
/// operates.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum SandboxPermission {
|
||||
/// Is allowed to read all files on disk.
|
||||
DiskFullReadAccess,
|
||||
|
||||
/// Is allowed to write to the operating system's temp dir that
|
||||
/// is restricted to the user the agent is running as. For
|
||||
/// example, on macOS, this is generally something under
|
||||
/// `/var/folders` as opposed to `/tmp`.
|
||||
DiskWritePlatformUserTempFolder,
|
||||
|
||||
/// Is allowed to write to the operating system's shared temp
|
||||
/// dir. On UNIX, this is generally `/tmp`.
|
||||
DiskWritePlatformGlobalTempFolder,
|
||||
|
||||
/// Is allowed to write to the current working directory (in practice, this
|
||||
/// is the `cwd` where `codex` was spawned).
|
||||
DiskWriteCwd,
|
||||
|
||||
/// Is allowed to the specified folder. `PathBuf` must be an
|
||||
/// absolute path, though it is up to the caller to canonicalize
|
||||
/// it if the path contains symlinks.
|
||||
DiskWriteFolder { folder: PathBuf },
|
||||
|
||||
/// Is allowed to write to any file on disk.
|
||||
DiskFullWriteAccess,
|
||||
|
||||
/// Can make arbitrary network requests.
|
||||
NetworkFullAccess,
|
||||
}
|
||||
|
||||
/// User input
|
||||
@@ -317,8 +334,9 @@ pub struct Event {
|
||||
}
|
||||
|
||||
/// Response event from the agent
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Display)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum EventMsg {
|
||||
/// Error while executing a submission
|
||||
Error(ErrorEvent),
|
||||
@@ -329,12 +347,28 @@ pub enum EventMsg {
|
||||
/// Agent has completed all actions
|
||||
TaskComplete(TaskCompleteEvent),
|
||||
|
||||
/// Token count event, sent periodically to report the number of tokens
|
||||
/// used in the current session.
|
||||
TokenCount(TokenUsage),
|
||||
|
||||
/// Agent text output message
|
||||
AgentMessage(AgentMessageEvent),
|
||||
|
||||
/// Agent text output delta message
|
||||
AgentMessageDelta(AgentMessageDeltaEvent),
|
||||
|
||||
/// Reasoning event from agent.
|
||||
AgentReasoning(AgentReasoningEvent),
|
||||
|
||||
/// Agent reasoning delta event from agent.
|
||||
AgentReasoningDelta(AgentReasoningDeltaEvent),
|
||||
|
||||
/// Raw chain-of-thought from agent.
|
||||
AgentReasoningRawContent(AgentReasoningRawContentEvent),
|
||||
|
||||
/// Agent reasoning content delta event from agent.
|
||||
AgentReasoningRawContentDelta(AgentReasoningRawContentDeltaEvent),
|
||||
|
||||
/// Ack the client's configure message.
|
||||
SessionConfigured(SessionConfiguredEvent),
|
||||
|
||||
@@ -345,6 +379,9 @@ pub enum EventMsg {
|
||||
/// Notification that the server is about to execute a command.
|
||||
ExecCommandBegin(ExecCommandBeginEvent),
|
||||
|
||||
/// Incremental chunk of output from a running command.
|
||||
ExecCommandOutputDelta(ExecCommandOutputDeltaEvent),
|
||||
|
||||
ExecCommandEnd(ExecCommandEndEvent),
|
||||
|
||||
ExecApprovalRequest(ExecApprovalRequestEvent),
|
||||
@@ -360,8 +397,15 @@ pub enum EventMsg {
|
||||
/// Notification that a patch application has finished.
|
||||
PatchApplyEnd(PatchApplyEndEvent),
|
||||
|
||||
TurnDiff(TurnDiffEvent),
|
||||
|
||||
/// Response to GetHistoryEntryRequest.
|
||||
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
|
||||
|
||||
PlanUpdate(UpdatePlanArgs),
|
||||
|
||||
/// Notification that the agent is shutting down.
|
||||
ShutdownComplete,
|
||||
}
|
||||
|
||||
// Individual event payload types matching each `EventMsg` variant.
|
||||
@@ -376,20 +420,83 @@ pub struct TaskCompleteEvent {
|
||||
pub last_agent_message: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub cached_input_tokens: Option<u64>,
|
||||
pub output_tokens: u64,
|
||||
pub reasoning_output_tokens: Option<u64>,
|
||||
pub total_tokens: u64,
|
||||
}
|
||||
|
||||
impl TokenUsage {
|
||||
pub fn is_zero(&self) -> bool {
|
||||
self.total_tokens == 0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct FinalOutput {
|
||||
pub token_usage: TokenUsage,
|
||||
}
|
||||
|
||||
impl From<TokenUsage> for FinalOutput {
|
||||
fn from(token_usage: TokenUsage) -> Self {
|
||||
Self { token_usage }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FinalOutput {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let u = &self.token_usage;
|
||||
write!(
|
||||
f,
|
||||
"Token usage: total={} input={}{} output={}{}",
|
||||
u.total_tokens,
|
||||
u.input_tokens,
|
||||
u.cached_input_tokens
|
||||
.map(|c| format!(" (cached {c})"))
|
||||
.unwrap_or_default(),
|
||||
u.output_tokens,
|
||||
u.reasoning_output_tokens
|
||||
.map(|r| format!(" (reasoning {r})"))
|
||||
.unwrap_or_default()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentMessageEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentMessageDeltaEvent {
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentReasoningEvent {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct McpToolCallBeginEvent {
|
||||
/// Identifier so this can be paired with the McpToolCallEnd event.
|
||||
pub call_id: String,
|
||||
pub struct AgentReasoningRawContentEvent {
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentReasoningRawContentDeltaEvent {
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct AgentReasoningDeltaEvent {
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct McpInvocation {
|
||||
/// Name of the MCP server as defined in the config.
|
||||
pub server: String,
|
||||
/// Name of the tool as given by the MCP server.
|
||||
@@ -398,10 +505,19 @@ pub struct McpToolCallBeginEvent {
|
||||
pub arguments: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct McpToolCallBeginEvent {
|
||||
/// Identifier so this can be paired with the McpToolCallEnd event.
|
||||
pub call_id: String,
|
||||
pub invocation: McpInvocation,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct McpToolCallEndEvent {
|
||||
/// Identifier for the corresponding McpToolCallBegin that finished.
|
||||
pub call_id: String,
|
||||
pub invocation: McpInvocation,
|
||||
pub duration: Duration,
|
||||
/// Result of the tool call. Note this could be an error.
|
||||
pub result: Result<CallToolResult, String>,
|
||||
}
|
||||
@@ -435,10 +551,32 @@ pub struct ExecCommandEndEvent {
|
||||
pub stderr: String,
|
||||
/// The command's exit code.
|
||||
pub exit_code: i32,
|
||||
/// The duration of the command execution.
|
||||
pub duration: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ExecOutputStream {
|
||||
Stdout,
|
||||
Stderr,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct ExecCommandOutputDeltaEvent {
|
||||
/// Identifier for the ExecCommandBegin that produced this chunk.
|
||||
pub call_id: String,
|
||||
/// Which stream produced this chunk.
|
||||
pub stream: ExecOutputStream,
|
||||
/// Raw bytes from the stream (may not be valid UTF-8).
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub chunk: ByteBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
@@ -450,6 +588,8 @@ pub struct ExecApprovalRequestEvent {
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
@@ -486,6 +626,11 @@ pub struct PatchApplyEndEvent {
|
||||
pub success: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TurnDiffEvent {
|
||||
pub unified_diff: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct GetHistoryEntryResponseEvent {
|
||||
pub offset: usize,
|
||||
|
||||
@@ -1,33 +1,57 @@
|
||||
//! Functionality to persist a Codex conversation *rollout* – a linear list of
|
||||
//! [`ResponseItem`] objects exchanged during a session – to disk so that
|
||||
//! sessions can be replayed or inspected later (mirrors the behaviour of the
|
||||
//! upstream TypeScript implementation).
|
||||
//! Persist Codex session rollouts (.jsonl) so sessions can be replayed or inspected later.
|
||||
|
||||
use std::fs::File;
|
||||
use std::fs::{self};
|
||||
use std::io::Error as IoError;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
use tokio::sync::mpsc::{self};
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::git_info::GitInfo;
|
||||
use crate::git_info::collect_git_info;
|
||||
use crate::models::ResponseItem;
|
||||
|
||||
/// Folder inside `~/.codex` that holds saved rollouts.
|
||||
const SESSIONS_SUBDIR: &str = "sessions";
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Default)]
|
||||
pub struct SessionMeta {
|
||||
pub id: Uuid,
|
||||
pub timestamp: String,
|
||||
pub instructions: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SessionMeta {
|
||||
id: String,
|
||||
timestamp: String,
|
||||
struct SessionMetaWithGit {
|
||||
#[serde(flatten)]
|
||||
meta: SessionMeta,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
instructions: Option<String>,
|
||||
git: Option<GitInfo>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, Clone)]
|
||||
pub struct SessionStateSnapshot {}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, Clone)]
|
||||
pub struct SavedSession {
|
||||
pub session: SessionMeta,
|
||||
#[serde(default)]
|
||||
pub items: Vec<ResponseItem>,
|
||||
#[serde(default)]
|
||||
pub state: SessionStateSnapshot,
|
||||
pub session_id: Uuid,
|
||||
}
|
||||
|
||||
/// Records all [`ResponseItem`]s for a session and flushes them to disk after
|
||||
@@ -41,7 +65,13 @@ struct SessionMeta {
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct RolloutRecorder {
|
||||
tx: Sender<String>,
|
||||
tx: Sender<RolloutCmd>,
|
||||
}
|
||||
|
||||
enum RolloutCmd {
|
||||
AddItems(Vec<ResponseItem>),
|
||||
UpdateState(SessionStateSnapshot),
|
||||
Shutdown { ack: oneshot::Sender<()> },
|
||||
}
|
||||
|
||||
impl RolloutRecorder {
|
||||
@@ -59,7 +89,6 @@ impl RolloutRecorder {
|
||||
timestamp,
|
||||
} = create_log_file(config, uuid)?;
|
||||
|
||||
// Build the static session metadata JSON first.
|
||||
let timestamp_format: &[FormatItem] = format_description!(
|
||||
"[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z"
|
||||
);
|
||||
@@ -67,48 +96,33 @@ impl RolloutRecorder {
|
||||
.format(timestamp_format)
|
||||
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
|
||||
|
||||
let meta = SessionMeta {
|
||||
timestamp,
|
||||
id: session_id.to_string(),
|
||||
instructions,
|
||||
};
|
||||
// Clone the cwd for the spawned task to collect git info asynchronously
|
||||
let cwd = config.cwd.clone();
|
||||
|
||||
// A reasonably-sized bounded channel. If the buffer fills up the send
|
||||
// future will yield, which is fine – we only need to ensure we do not
|
||||
// perform *blocking* I/O on the caller’s thread.
|
||||
let (tx, mut rx) = mpsc::channel::<String>(256);
|
||||
// perform *blocking* I/O on the caller's thread.
|
||||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||||
|
||||
// Spawn a Tokio task that owns the file handle and performs async
|
||||
// writes. Using `tokio::fs::File` keeps everything on the async I/O
|
||||
// driver instead of blocking the runtime.
|
||||
tokio::task::spawn(async move {
|
||||
let mut file = tokio::fs::File::from_std(file);
|
||||
tokio::task::spawn(rollout_writer(
|
||||
tokio::fs::File::from_std(file),
|
||||
rx,
|
||||
Some(SessionMeta {
|
||||
timestamp,
|
||||
id: session_id,
|
||||
instructions,
|
||||
}),
|
||||
cwd,
|
||||
));
|
||||
|
||||
while let Some(line) = rx.recv().await {
|
||||
// Write line + newline, then flush to disk.
|
||||
if let Err(e) = file.write_all(line.as_bytes()).await {
|
||||
tracing::warn!("rollout writer: failed to write line: {e}");
|
||||
break;
|
||||
}
|
||||
if let Err(e) = file.write_all(b"\n").await {
|
||||
tracing::warn!("rollout writer: failed to write newline: {e}");
|
||||
break;
|
||||
}
|
||||
if let Err(e) = file.flush().await {
|
||||
tracing::warn!("rollout writer: failed to flush: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let recorder = Self { tx };
|
||||
// Ensure SessionMeta is the first item in the file.
|
||||
recorder.record_item(&meta).await?;
|
||||
Ok(recorder)
|
||||
Ok(Self { tx })
|
||||
}
|
||||
|
||||
/// Append `items` to the rollout file.
|
||||
pub(crate) async fn record_items(&self, items: &[ResponseItem]) -> std::io::Result<()> {
|
||||
let mut filtered = Vec::new();
|
||||
for item in items {
|
||||
match item {
|
||||
// Note that function calls may look a bit strange if they are
|
||||
@@ -117,27 +131,114 @@ impl RolloutRecorder {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. } => {}
|
||||
ResponseItem::Reasoning { .. } | ResponseItem::Other => {
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => filtered.push(item.clone()),
|
||||
ResponseItem::Other => {
|
||||
// These should never be serialized.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
self.record_item(item).await?;
|
||||
}
|
||||
Ok(())
|
||||
if filtered.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
self.tx
|
||||
.send(RolloutCmd::AddItems(filtered))
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout items: {e}")))
|
||||
}
|
||||
|
||||
async fn record_item(&self, item: &impl Serialize) -> std::io::Result<()> {
|
||||
// Serialize the item to JSON first so that the writer thread only has
|
||||
// to perform the actual write.
|
||||
let json = serde_json::to_string(item)
|
||||
.map_err(|e| IoError::other(format!("failed to serialize response items: {e}")))?;
|
||||
|
||||
pub(crate) async fn record_state(&self, state: SessionStateSnapshot) -> std::io::Result<()> {
|
||||
self.tx
|
||||
.send(json)
|
||||
.send(RolloutCmd::UpdateState(state))
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout item: {e}")))
|
||||
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
|
||||
}
|
||||
|
||||
pub async fn resume(
|
||||
path: &Path,
|
||||
cwd: std::path::PathBuf,
|
||||
) -> std::io::Result<(Self, SavedSession)> {
|
||||
info!("Resuming rollout from {path:?}");
|
||||
let text = tokio::fs::read_to_string(path).await?;
|
||||
let mut lines = text.lines();
|
||||
let meta_line = lines
|
||||
.next()
|
||||
.ok_or_else(|| IoError::other("empty session file"))?;
|
||||
let session: SessionMeta = serde_json::from_str(meta_line)
|
||||
.map_err(|e| IoError::other(format!("failed to parse session meta: {e}")))?;
|
||||
let mut items = Vec::new();
|
||||
let mut state = SessionStateSnapshot::default();
|
||||
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: Value = match serde_json::from_str(line) {
|
||||
Ok(v) => v,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if v.get("record_type")
|
||||
.and_then(|rt| rt.as_str())
|
||||
.map(|s| s == "state")
|
||||
.unwrap_or(false)
|
||||
{
|
||||
if let Ok(s) = serde_json::from_value::<SessionStateSnapshot>(v.clone()) {
|
||||
state = s
|
||||
}
|
||||
continue;
|
||||
}
|
||||
match serde_json::from_value::<ResponseItem>(v.clone()) {
|
||||
Ok(item) => match item {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => items.push(item),
|
||||
ResponseItem::Other => {}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to parse item: {v:?}, error: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let saved = SavedSession {
|
||||
session: session.clone(),
|
||||
items: items.clone(),
|
||||
state: state.clone(),
|
||||
session_id: session.id,
|
||||
};
|
||||
|
||||
let file = std::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.read(true)
|
||||
.open(path)?;
|
||||
|
||||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||||
tokio::task::spawn(rollout_writer(
|
||||
tokio::fs::File::from_std(file),
|
||||
rx,
|
||||
None,
|
||||
cwd,
|
||||
));
|
||||
info!("Resumed rollout successfully from {path:?}");
|
||||
Ok((Self { tx }, saved))
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) -> std::io::Result<()> {
|
||||
let (tx_done, rx_done) = oneshot::channel();
|
||||
match self.tx.send(RolloutCmd::Shutdown { ack: tx_done }).await {
|
||||
Ok(_) => rx_done
|
||||
.await
|
||||
.map_err(|e| IoError::other(format!("failed waiting for rollout shutdown: {e}"))),
|
||||
Err(e) => {
|
||||
warn!("failed to send rollout shutdown command: {e}");
|
||||
Err(IoError::other(format!(
|
||||
"failed to send rollout shutdown command: {e}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,13 +254,15 @@ struct LogFileInfo {
|
||||
}
|
||||
|
||||
fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFileInfo> {
|
||||
// Resolve ~/.codex/sessions and create it if missing.
|
||||
let mut dir = config.codex_home.clone();
|
||||
dir.push(SESSIONS_SUBDIR);
|
||||
fs::create_dir_all(&dir)?;
|
||||
|
||||
// Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing.
|
||||
let timestamp = OffsetDateTime::now_local()
|
||||
.map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
|
||||
let mut dir = config.codex_home.clone();
|
||||
dir.push(SESSIONS_SUBDIR);
|
||||
dir.push(timestamp.year().to_string());
|
||||
dir.push(format!("{:02}", u8::from(timestamp.month())));
|
||||
dir.push(format!("{:02}", timestamp.day()));
|
||||
fs::create_dir_all(&dir)?;
|
||||
|
||||
// Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for
|
||||
// compatibility with filesystems that do not allow colons in filenames.
|
||||
@@ -183,3 +286,77 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||||
timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
async fn rollout_writer(
|
||||
file: tokio::fs::File,
|
||||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||||
mut meta: Option<SessionMeta>,
|
||||
cwd: std::path::PathBuf,
|
||||
) -> std::io::Result<()> {
|
||||
let mut writer = JsonlWriter { file };
|
||||
|
||||
// If we have a meta, collect git info asynchronously and write meta first
|
||||
if let Some(session_meta) = meta.take() {
|
||||
let git_info = collect_git_info(&cwd).await;
|
||||
let session_meta_with_git = SessionMetaWithGit {
|
||||
meta: session_meta,
|
||||
git: git_info,
|
||||
};
|
||||
|
||||
// Write the SessionMeta as the first item in the file
|
||||
writer.write_line(&session_meta_with_git).await?;
|
||||
}
|
||||
|
||||
// Process rollout commands
|
||||
while let Some(cmd) = rx.recv().await {
|
||||
match cmd {
|
||||
RolloutCmd::AddItems(items) => {
|
||||
for item in items {
|
||||
match item {
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::Reasoning { .. } => {
|
||||
writer.write_line(&item).await?;
|
||||
}
|
||||
ResponseItem::Other => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
RolloutCmd::UpdateState(state) => {
|
||||
#[derive(Serialize)]
|
||||
struct StateLine<'a> {
|
||||
record_type: &'static str,
|
||||
#[serde(flatten)]
|
||||
state: &'a SessionStateSnapshot,
|
||||
}
|
||||
writer
|
||||
.write_line(&StateLine {
|
||||
record_type: "state",
|
||||
state: &state,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
RolloutCmd::Shutdown { ack } => {
|
||||
let _ = ack.send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct JsonlWriter {
|
||||
file: tokio::fs::File,
|
||||
}
|
||||
|
||||
impl JsonlWriter {
|
||||
async fn write_line(&mut self, item: &impl serde::Serialize) -> std::io::Result<()> {
|
||||
let mut json = serde_json::to_string(item)?;
|
||||
json.push('\n');
|
||||
let _ = self.file.write_all(json.as_bytes()).await;
|
||||
self.file.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum SafetyCheck {
|
||||
AutoApprove { sandbox_type: SandboxType },
|
||||
AskUser,
|
||||
@@ -31,21 +31,23 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
|
||||
match policy {
|
||||
AskForApproval::OnFailure | AskForApproval::AutoEdit | AskForApproval::Never => {
|
||||
AskForApproval::OnFailure | AskForApproval::Never | AskForApproval::OnRequest => {
|
||||
// Continue to see if this can be auto-approved.
|
||||
}
|
||||
// TODO(ragona): I'm not sure this is actually correct? I believe in this case
|
||||
// we want to continue to the writable paths check before asking the user.
|
||||
AskForApproval::UnlessAllowListed => {
|
||||
AskForApproval::UnlessTrusted => {
|
||||
return SafetyCheck::AskUser;
|
||||
}
|
||||
}
|
||||
|
||||
if is_write_patch_constrained_to_writable_paths(action, writable_roots, cwd) {
|
||||
SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
}
|
||||
} else if policy == AskForApproval::OnFailure {
|
||||
// Even though the patch *appears* to be constrained to writable paths, it
|
||||
// is possible that paths in the patch are hard links to files outside the
|
||||
// writable roots, so we should still run `apply_patch` in a sandbox in that
|
||||
// case.
|
||||
if is_write_patch_constrained_to_writable_paths(action, writable_roots, cwd)
|
||||
|| policy == AskForApproval::OnFailure
|
||||
{
|
||||
// Only auto‑approve when we can actually enforce a sandbox. Otherwise
|
||||
// fall back to asking the user because the patch may touch arbitrary
|
||||
// paths outside the project.
|
||||
@@ -63,40 +65,94 @@ pub fn assess_patch_safety(
|
||||
}
|
||||
}
|
||||
|
||||
/// For a command to be run _without_ a sandbox, one of the following must be
|
||||
/// true:
|
||||
///
|
||||
/// - the user has explicitly approved the command
|
||||
/// - the command is on the "known safe" list
|
||||
/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
|
||||
pub fn assess_command_safety(
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approved: &HashSet<Vec<String>>,
|
||||
with_escalated_permissions: bool,
|
||||
) -> SafetyCheck {
|
||||
let approve_without_sandbox = || SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
|
||||
// Previously approved or allow-listed commands
|
||||
// All approval modes allow these commands to continue without sandboxing
|
||||
// A command is "trusted" because either:
|
||||
// - it belongs to a set of commands we consider "safe" by default, or
|
||||
// - the user has explicitly approved the command for this session
|
||||
//
|
||||
// Currently, whether a command is "trusted" is a simple boolean, but we
|
||||
// should include more metadata on this command test to indicate whether it
|
||||
// should be run inside a sandbox or not. (This could be something the user
|
||||
// defines as part of `execpolicy`.)
|
||||
//
|
||||
// For example, when `is_known_safe_command(command)` returns `true`, it
|
||||
// would probably be fine to run the command in a sandbox, but when
|
||||
// `approved.contains(command)` is `true`, the user may have approved it for
|
||||
// the session _because_ they know it needs to run outside a sandbox.
|
||||
if is_known_safe_command(command) || approved.contains(command) {
|
||||
// TODO(ragona): I think we should consider running even these inside the sandbox, but it's
|
||||
// a change in behavior so I'm keeping it at parity with upstream for now.
|
||||
return approve_without_sandbox();
|
||||
return SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
};
|
||||
}
|
||||
|
||||
// Command was not known-safe or allow-listed
|
||||
if sandbox_policy.is_unrestricted() {
|
||||
approve_without_sandbox()
|
||||
} else {
|
||||
match get_platform_sandbox() {
|
||||
// We have a sandbox, so we can approve the command in all modes
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
// We do not have a sandbox, so we need to consider the approval policy
|
||||
match approval_policy {
|
||||
// Never is our "non-interactive" mode; it must automatically reject
|
||||
AskForApproval::Never => SafetyCheck::Reject {
|
||||
reason: "auto-rejected by user approval settings".to_string(),
|
||||
},
|
||||
// Otherwise, we ask the user for approval
|
||||
_ => SafetyCheck::AskUser,
|
||||
assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions)
|
||||
}
|
||||
|
||||
pub(crate) fn assess_safety_for_untrusted_command(
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
with_escalated_permissions: bool,
|
||||
) -> SafetyCheck {
|
||||
use AskForApproval::*;
|
||||
use SandboxPolicy::*;
|
||||
|
||||
match (approval_policy, sandbox_policy) {
|
||||
(UnlessTrusted, _) => {
|
||||
// Even though the user may have opted into DangerFullAccess,
|
||||
// they also requested that we ask for approval for untrusted
|
||||
// commands.
|
||||
SafetyCheck::AskUser
|
||||
}
|
||||
(OnFailure, DangerFullAccess)
|
||||
| (Never, DangerFullAccess)
|
||||
| (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove {
|
||||
sandbox_type: SandboxType::None,
|
||||
},
|
||||
(OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => {
|
||||
if with_escalated_permissions {
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
// Fall back to asking since the command is untrusted and
|
||||
// we do not have a sandbox available
|
||||
None => SafetyCheck::AskUser,
|
||||
}
|
||||
}
|
||||
}
|
||||
(Never, ReadOnly)
|
||||
| (Never, WorkspaceWrite { .. })
|
||||
| (OnFailure, ReadOnly)
|
||||
| (OnFailure, WorkspaceWrite { .. }) => {
|
||||
match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => {
|
||||
if matches!(approval_policy, OnFailure) {
|
||||
// Since the command is not trusted, even though the
|
||||
// user has requested to only ask for approval on
|
||||
// failure, we will ask the user because no sandbox is
|
||||
// available.
|
||||
SafetyCheck::AskUser
|
||||
} else {
|
||||
// We are in non-interactive mode and lack approval, so
|
||||
// all we can do is reject the command.
|
||||
SafetyCheck::Reject {
|
||||
reason: "auto-rejected because command is not on trusted list"
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -224,4 +280,47 @@ mod tests {
|
||||
&cwd,
|
||||
))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_request_escalated_privileges() {
|
||||
// Should not be a trusted command
|
||||
let command = vec!["git commit".to_string()];
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let approved: HashSet<Vec<String>> = HashSet::new();
|
||||
let request_escalated_privileges = true;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
assert_eq!(safety_check, SafetyCheck::AskUser);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_request_escalated_privileges_no_sandbox_fallback() {
|
||||
let command = vec!["git".to_string(), "commit".to_string()];
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let approved: HashSet<Vec<String>> = HashSet::new();
|
||||
let request_escalated_privileges = false;
|
||||
|
||||
let safety_check = assess_command_safety(
|
||||
&command,
|
||||
approval_policy,
|
||||
&sandbox_policy,
|
||||
&approved,
|
||||
request_escalated_privileges,
|
||||
);
|
||||
|
||||
let expected = match get_platform_sandbox() {
|
||||
Some(sandbox_type) => SafetyCheck::AutoApprove { sandbox_type },
|
||||
None => SafetyCheck::AskUser,
|
||||
};
|
||||
assert_eq!(safety_check, expected);
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user