diff --git a/.codespellignore b/.codespellignore index 546a192701..d74f5ed86c 100644 --- a/.codespellignore +++ b/.codespellignore @@ -1 +1,2 @@ iTerm +psuedo \ No newline at end of file diff --git a/.github/actions/linux-code-sign/action.yml b/.github/actions/linux-code-sign/action.yml new file mode 100644 index 0000000000..5a117b0805 --- /dev/null +++ b/.github/actions/linux-code-sign/action.yml @@ -0,0 +1,44 @@ +name: linux-code-sign +description: Sign Linux artifacts with cosign. +inputs: + target: + description: Target triple for the artifacts to sign. + required: true + artifacts-dir: + description: Absolute path to the directory containing built binaries to sign. + required: true + +runs: + using: composite + steps: + - name: Install cosign + uses: sigstore/cosign-installer@v3.7.0 + + - name: Cosign Linux artifacts + shell: bash + env: + COSIGN_EXPERIMENTAL: "1" + COSIGN_YES: "true" + COSIGN_OIDC_CLIENT_ID: "sigstore" + COSIGN_OIDC_ISSUER: "https://oauth2.sigstore.dev/auth" + run: | + set -euo pipefail + + dest="${{ inputs.artifacts-dir }}" + if [[ ! -d "$dest" ]]; then + echo "Destination $dest does not exist" + exit 1 + fi + + for binary in codex codex-responses-api-proxy; do + artifact="${dest}/${binary}" + if [[ ! -f "$artifact" ]]; then + echo "Binary $artifact not found" + exit 1 + fi + + cosign sign-blob \ + --yes \ + --bundle "${artifact}.sigstore" \ + "$artifact" + done diff --git a/.github/actions/windows-code-sign/action.yml b/.github/actions/windows-code-sign/action.yml new file mode 100644 index 0000000000..2be64efc98 --- /dev/null +++ b/.github/actions/windows-code-sign/action.yml @@ -0,0 +1,55 @@ +name: windows-code-sign +description: Sign Windows binaries with Azure Trusted Signing. +inputs: + target: + description: Target triple for the artifacts to sign. + required: true + client-id: + description: Azure Trusted Signing client ID. + required: true + tenant-id: + description: Azure tenant ID for Trusted Signing. + required: true + subscription-id: + description: Azure subscription ID for Trusted Signing. + required: true + endpoint: + description: Azure Trusted Signing endpoint. + required: true + account-name: + description: Azure Trusted Signing account name. + required: true + certificate-profile-name: + description: Certificate profile name for signing. + required: true + +runs: + using: composite + steps: + - name: Azure login for Trusted Signing (OIDC) + uses: azure/login@v2 + with: + client-id: ${{ inputs.client-id }} + tenant-id: ${{ inputs.tenant-id }} + subscription-id: ${{ inputs.subscription-id }} + + - name: Sign Windows binaries with Azure Trusted Signing + uses: azure/trusted-signing-action@v0 + with: + endpoint: ${{ inputs.endpoint }} + trusted-signing-account-name: ${{ inputs.account-name }} + certificate-profile-name: ${{ inputs.certificate-profile-name }} + exclude-environment-credential: true + exclude-workload-identity-credential: true + exclude-managed-identity-credential: true + exclude-shared-token-cache-credential: true + exclude-visual-studio-credential: true + exclude-visual-studio-code-credential: true + exclude-azure-cli-credential: false + exclude-azure-powershell-credential: true + exclude-azure-developer-cli-credential: true + exclude-interactive-browser-credential: true + cache-dependencies: false + files: | + ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex.exe + ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex-responses-api-proxy.exe diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 08c39db69d..13fcaacbbd 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -369,6 +369,49 @@ jobs: steps: - uses: actions/checkout@v6 + + # We have been running out of space when running this job on Linux for + # x86_64-unknown-linux-gnu, so remove some unnecessary dependencies. + - name: Remove unnecessary dependencies to save space + if: ${{ startsWith(matrix.runner, 'ubuntu') }} + shell: bash + run: | + set -euo pipefail + sudo rm -rf \ + /usr/local/lib/android \ + /usr/share/dotnet \ + /usr/local/share/boost \ + /usr/local/lib/node_modules \ + /opt/ghc + sudo apt-get remove -y docker.io docker-compose podman buildah + + # Ensure brew includes this fix so that brew's shellenv.sh loads + # cleanly in the Codex sandbox (it is frequently eval'd via .zprofile + # for Brew users, including the macOS runners on GitHub): + # + # https://github.com/Homebrew/brew/pull/21157 + # + # Once brew 5.0.5 is released and is the default on macOS runners, this + # step can be removed. + - name: Upgrade brew + if: ${{ startsWith(matrix.runner, 'macos') }} + shell: bash + run: | + set -euo pipefail + brew --version + git -C "$(brew --repo)" fetch origin + git -C "$(brew --repo)" checkout main + git -C "$(brew --repo)" reset --hard origin/main + export HOMEBREW_UPDATE_TO_TAG=0 + brew update + brew upgrade + brew --version + + # Some integration tests rely on DotSlash being installed. + # See https://github.com/openai/codex/pull/7617. + - name: Install DotSlash + uses: facebook/install-dotslash@v2 + - uses: dtolnay/rust-toolchain@1.90 with: targets: ${{ matrix.target }} diff --git a/.github/workflows/rust-release.yml b/.github/workflows/rust-release.yml index 14f8aa0327..b90f0027fa 100644 --- a/.github/workflows/rust-release.yml +++ b/.github/workflows/rust-release.yml @@ -50,6 +50,9 @@ jobs: name: Build - ${{ matrix.runner }} - ${{ matrix.target }} runs-on: ${{ matrix.runner }} timeout-minutes: 30 + permissions: + contents: read + id-token: write defaults: run: working-directory: codex-rs @@ -100,6 +103,25 @@ jobs: - name: Cargo build run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy + - if: ${{ contains(matrix.target, 'linux') }} + name: Cosign Linux artifacts + uses: ./.github/actions/linux-code-sign + with: + target: ${{ matrix.target }} + artifacts-dir: ${{ github.workspace }}/codex-rs/target/${{ matrix.target }}/release + + - if: ${{ contains(matrix.target, 'windows') }} + name: Sign Windows binaries with Azure Trusted Signing + uses: ./.github/actions/windows-code-sign + with: + target: ${{ matrix.target }} + client-id: ${{ secrets.AZURE_TRUSTED_SIGNING_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TRUSTED_SIGNING_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_TRUSTED_SIGNING_SUBSCRIPTION_ID }} + endpoint: ${{ secrets.AZURE_TRUSTED_SIGNING_ENDPOINT }} + account-name: ${{ secrets.AZURE_TRUSTED_SIGNING_ACCOUNT_NAME }} + certificate-profile-name: ${{ secrets.AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME }} + - if: ${{ matrix.runner == 'macos-15-xlarge' }} name: Configure Apple code signing shell: bash @@ -283,6 +305,11 @@ jobs: cp target/${{ matrix.target }}/release/codex-responses-api-proxy "$dest/codex-responses-api-proxy-${{ matrix.target }}" fi + if [[ "${{ matrix.target }}" == *linux* ]]; then + cp target/${{ matrix.target }}/release/codex.sigstore "$dest/codex-${{ matrix.target }}.sigstore" + cp target/${{ matrix.target }}/release/codex-responses-api-proxy.sigstore "$dest/codex-responses-api-proxy-${{ matrix.target }}.sigstore" + fi + - if: ${{ matrix.runner == 'windows-11-arm' }} name: Install zstd shell: powershell @@ -321,6 +348,11 @@ jobs: continue fi + # Don't try to compress signature bundles. + if [[ "$base" == *.sigstore ]]; then + continue + fi + # Create per-binary tar.gz tar -C "$dest" -czf "$dest/${base}.tar.gz" "$base" diff --git a/AGENTS.md b/AGENTS.md index aaebd0dfd3..f9f04c5b15 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -75,6 +75,7 @@ If you don’t have the tool: ### Test assertions - Tests should use pretty_assertions::assert_eq for clearer diffs. Import this at the top of the test module if it isn't already. +- Prefer deep equals comparisons whenever possible. Perform `assert_eq!()` on entire objects, rather than individual fields. ### Integration tests (core) diff --git a/codex-cli/bin/codex.js b/codex-cli/bin/codex.js index 805be85af8..138796e5d6 100644 --- a/codex-cli/bin/codex.js +++ b/codex-cli/bin/codex.js @@ -95,14 +95,6 @@ function detectPackageManager() { return "bun"; } - if ( - process.env.BUN_INSTALL || - process.env.BUN_INSTALL_GLOBAL_DIR || - process.env.BUN_INSTALL_BIN_DIR - ) { - return "bun"; - } - return userAgent ? "npm" : null; } diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 664f6849f1..9d41bc612e 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -238,48 +238,6 @@ dependencies = [ "term", ] -[[package]] -name = "askama" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f75363874b771be265f4ffe307ca705ef6f3baa19011c149da8674a87f1b75c4" -dependencies = [ - "askama_derive", - "itoa", - "percent-encoding", - "serde", - "serde_json", -] - -[[package]] -name = "askama_derive" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "129397200fe83088e8a68407a8e2b1f826cf0086b21ccdb866a722c8bcd3a94f" -dependencies = [ - "askama_parser", - "basic-toml", - "memchr", - "proc-macro2", - "quote", - "rustc-hash", - "serde", - "serde_derive", - "syn 2.0.104", -] - -[[package]] -name = "askama_parser" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6ab5630b3d5eaf232620167977f95eb51f3432fc76852328774afbd242d4358" -dependencies = [ - "memchr", - "serde", - "serde_derive", - "winnow", -] - [[package]] name = "assert-json-diff" version = "2.0.2" @@ -557,15 +515,6 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "basic-toml" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba62675e8242a4c4e806d12f11d136e626e6c8361d6b829310732241652a178a" -dependencies = [ - "serde", -] - [[package]] name = "beef" version = "0.5.2" @@ -858,6 +807,7 @@ dependencies = [ "http", "pretty_assertions", "regex-lite", + "reqwest", "serde", "serde_json", "thiserror 2.0.17", @@ -865,6 +815,7 @@ dependencies = [ "tokio-test", "tokio-util", "tracing", + "wiremock", ] [[package]] @@ -885,6 +836,7 @@ dependencies = [ "codex-file-search", "codex-login", "codex-protocol", + "codex-rmcp-client", "codex-utils-json-to-toml", "core_test_support", "mcp-types", @@ -898,7 +850,8 @@ dependencies = [ "shlex", "tempfile", "tokio", - "toml", + "toml 0.9.5", + "toml_edit", "tracing", "tracing-subscriber", "uuid", @@ -1038,6 +991,7 @@ dependencies = [ "codex-rmcp-client", "codex-stdio-to-uds", "codex-tui", + "codex-tui2", "codex-windows-sandbox", "ctor 0.5.0", "libc", @@ -1046,10 +1000,10 @@ dependencies = [ "pretty_assertions", "regex-lite", "serde_json", - "supports-color", + "supports-color 3.0.2", "tempfile", "tokio", - "toml", + "toml 0.9.5", "tracing", ] @@ -1086,10 +1040,13 @@ dependencies = [ "codex-login", "codex-tui", "crossterm", + "owo-colors", + "pretty_assertions", "ratatui", "reqwest", "serde", "serde_json", + "supports-color 3.0.2", "tokio", "tokio-stream", "tracing", @@ -1117,14 +1074,12 @@ name = "codex-common" version = "0.0.0" dependencies = [ "clap", - "codex-app-server-protocol", "codex-core", "codex-lmstudio", "codex-ollama", "codex-protocol", - "once_cell", "serde", - "toml", + "toml 0.9.5", ] [[package]] @@ -1132,7 +1087,6 @@ name = "codex-core" version = "0.0.0" dependencies = [ "anyhow", - "askama", "assert_cmd", "assert_matches", "async-channel", @@ -1145,6 +1099,7 @@ dependencies = [ "codex-apply-patch", "codex-arg0", "codex-async-utils", + "codex-client", "codex-core", "codex-execpolicy", "codex-file-search", @@ -1153,6 +1108,7 @@ dependencies = [ "codex-otel", "codex-protocol", "codex-rmcp-client", + "codex-utils-absolute-path", "codex-utils-pty", "codex-utils-readiness", "codex-utils-string", @@ -1202,7 +1158,7 @@ dependencies = [ "tokio", "tokio-test", "tokio-util", - "toml", + "toml 0.9.5", "toml_edit", "tracing", "tracing-test", @@ -1237,7 +1193,7 @@ dependencies = [ "serde", "serde_json", "shlex", - "supports-color", + "supports-color 3.0.2", "tempfile", "tokio", "tracing", @@ -1254,10 +1210,14 @@ name = "codex-exec-server" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "async-trait", "clap", "codex-core", + "codex-execpolicy", + "exec_server_test_support", "libc", + "maplit", "path-absolutize", "pretty_assertions", "rmcp", @@ -1270,6 +1230,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "which", ] [[package]] @@ -1295,7 +1256,7 @@ dependencies = [ "allocative", "anyhow", "clap", - "derive_more 2.0.1", + "derive_more 2.1.0", "env_logger", "log", "multimap", @@ -1456,6 +1417,7 @@ dependencies = [ "chrono", "codex-app-server-protocol", "codex-protocol", + "codex-utils-absolute-path", "eventsource-stream", "http", "opentelemetry", @@ -1476,6 +1438,7 @@ name = "codex-process-hardening" version = "0.0.0" dependencies = [ "libc", + "pretty_assertions", ] [[package]] @@ -1483,7 +1446,6 @@ name = "codex-protocol" version = "0.0.0" dependencies = [ "anyhow", - "base64", "codex-git", "codex-utils-image", "icu_decimal", @@ -1586,7 +1548,7 @@ dependencies = [ "codex-windows-sandbox", "color-eyre", "crossterm", - "derive_more 2.0.1", + "derive_more 2.1.0", "diffy", "dirs", "dunce", @@ -1611,13 +1573,13 @@ dependencies = [ "shlex", "strum 0.27.2", "strum_macros 0.27.2", - "supports-color", + "supports-color 3.0.2", "tempfile", "textwrap 0.16.2", "tokio", "tokio-stream", "tokio-util", - "toml", + "toml 0.9.5", "tracing", "tracing-appender", "tracing-subscriber", @@ -1626,9 +1588,89 @@ dependencies = [ "unicode-segmentation", "unicode-width 0.2.1", "url", + "uuid", "vt100", ] +[[package]] +name = "codex-tui2" +version = "0.0.0" +dependencies = [ + "anyhow", + "arboard", + "assert_matches", + "async-stream", + "base64", + "chrono", + "clap", + "codex-ansi-escape", + "codex-app-server-protocol", + "codex-arg0", + "codex-backend-client", + "codex-common", + "codex-core", + "codex-feedback", + "codex-file-search", + "codex-login", + "codex-protocol", + "codex-tui", + "codex-windows-sandbox", + "color-eyre", + "crossterm", + "derive_more 2.1.0", + "diffy", + "dirs", + "dunce", + "image", + "insta", + "itertools 0.14.0", + "lazy_static", + "libc", + "mcp-types", + "opentelemetry-appender-tracing", + "pathdiff", + "pretty_assertions", + "pulldown-cmark", + "rand 0.9.2", + "ratatui", + "ratatui-macros", + "regex-lite", + "reqwest", + "serde", + "serde_json", + "serial_test", + "shlex", + "strum 0.27.2", + "strum_macros 0.27.2", + "supports-color 3.0.2", + "tempfile", + "textwrap 0.16.2", + "tokio", + "tokio-stream", + "tokio-util", + "toml 0.9.5", + "tracing", + "tracing-appender", + "tracing-subscriber", + "tree-sitter-bash", + "tree-sitter-highlight", + "unicode-segmentation", + "unicode-width 0.2.1", + "url", + "uuid", + "vt100", +] + +[[package]] +name = "codex-utils-absolute-path" +version = "0.0.0" +dependencies = [ + "path-absolutize", + "serde", + "serde_json", + "tempfile", +] + [[package]] name = "codex-utils-cache" version = "0.0.0" @@ -1656,7 +1698,7 @@ version = "0.0.0" dependencies = [ "pretty_assertions", "serde_json", - "toml", + "toml 0.9.5", ] [[package]] @@ -1664,8 +1706,13 @@ name = "codex-utils-pty" version = "0.0.0" dependencies = [ "anyhow", + "filedescriptor", + "lazy_static", + "log", "portable-pty", + "shared_library", "tokio", + "winapi", ] [[package]] @@ -1688,6 +1735,8 @@ name = "codex-windows-sandbox" version = "0.0.0" dependencies = [ "anyhow", + "base64", + "chrono", "codex-protocol", "dirs-next", "dunce", @@ -1695,7 +1744,9 @@ dependencies = [ "serde", "serde_json", "tempfile", + "windows 0.58.0", "windows-sys 0.52.0", + "winres", ] [[package]] @@ -1787,9 +1838,9 @@ dependencies = [ [[package]] name = "convert_case" -version = "0.7.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" dependencies = [ "unicode-segmentation", ] @@ -2128,11 +2179,11 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" dependencies = [ - "derive_more-impl 2.0.1", + "derive_more-impl 2.1.0", ] [[package]] @@ -2150,13 +2201,14 @@ dependencies = [ [[package]] name = "derive_more-impl" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" dependencies = [ - "convert_case 0.7.1", + "convert_case 0.10.0", "proc-macro2", "quote", + "rustc_version", "syn 2.0.104", "unicode-xid", ] @@ -2497,6 +2549,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "exec_server_test_support" +version = "0.0.0" +dependencies = [ + "anyhow", + "assert_cmd", + "codex-core", + "rmcp", + "serde_json", + "tokio", +] + [[package]] name = "eyre" version = "0.6.12" @@ -3115,7 +3179,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.61.2", ] [[package]] @@ -3379,9 +3443,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.43.2" +version = "1.44.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" +checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698" dependencies = [ "console", "once_cell", @@ -4433,6 +4497,10 @@ name = "owo-colors" version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" +dependencies = [ + "supports-color 2.1.0", + "supports-color 3.0.2", +] [[package]] name = "parking" @@ -4750,7 +4818,7 @@ dependencies = [ "nix 0.30.1", "tokio", "tracing", - "windows", + "windows 0.61.3", ] [[package]] @@ -6169,6 +6237,16 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + [[package]] name = "supports-color" version = "3.0.2" @@ -6606,6 +6684,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + [[package]] name = "toml" version = "0.9.5" @@ -6905,9 +6992,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "ts-rs" -version = "11.0.1" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef1b7a6d914a34127ed8e1fa927eb7088903787bcded4fa3eef8f85ee1568be" +checksum = "4994acea2522cd2b3b85c1d9529a55991e3ad5e25cdcd3de9d505972c4379424" dependencies = [ "serde_json", "thiserror 2.0.17", @@ -6917,9 +7004,9 @@ dependencies = [ [[package]] name = "ts-rs-macros" -version = "11.0.1" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9d4ed7b4c18cc150a6a0a1e9ea1ecfa688791220781af6e119f9599a8502a0a" +checksum = "ee6ff59666c9cbaec3533964505d39154dc4e0a56151fdea30a09ed0301f62e2" dependencies = [ "proc-macro2", "quote", @@ -7375,9 +7462,9 @@ dependencies = [ [[package]] name = "wildmatch" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39b7d07a236abaef6607536ccfaf19b396dbe3f5110ddb73d39f4562902ed382" +checksum = "29333c3ea1ba8b17211763463ff24ee84e41c78224c16b001cd907e663a38c68" [[package]] name = "winapi" @@ -7410,6 +7497,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows" version = "0.61.3" @@ -7417,7 +7514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", - "windows-core", + "windows-core 0.61.2", "windows-future", "windows-link 0.1.3", "windows-numerics", @@ -7429,7 +7526,20 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ - "windows-core", + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement 0.58.0", + "windows-interface 0.58.0", + "windows-result 0.2.0", + "windows-strings 0.1.0", + "windows-targets 0.52.6", ] [[package]] @@ -7438,11 +7548,11 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-implement", - "windows-interface", + "windows-implement 0.60.0", + "windows-interface 0.59.1", "windows-link 0.1.3", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", ] [[package]] @@ -7451,11 +7561,22 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", "windows-threading", ] +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "windows-implement" version = "0.60.0" @@ -7467,6 +7588,17 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "windows-interface" version = "0.59.1" @@ -7496,7 +7628,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", ] @@ -7507,8 +7639,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" dependencies = [ "windows-link 0.1.3", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -7520,6 +7661,16 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result 0.2.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows-strings" version = "0.4.2" @@ -7843,6 +7994,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "winres" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b68db261ef59e9e52806f688020631e987592bd83619edccda9c47d42cde4f6c" +dependencies = [ + "toml 0.5.11", +] + [[package]] name = "winsafe" version = "0.0.19" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 2339cd4e67..cdf55434fe 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -34,6 +34,8 @@ members = [ "stdio-to-uds", "otel", "tui", + "tui2", + "utils/absolute-path", "utils/git", "utils/cache", "utils/image", @@ -88,6 +90,8 @@ codex-responses-api-proxy = { path = "responses-api-proxy" } codex-rmcp-client = { path = "rmcp-client" } codex-stdio-to-uds = { path = "stdio-to-uds" } codex-tui = { path = "tui" } +codex-tui2 = { path = "tui2" } +codex-utils-absolute-path = { path = "utils/absolute-path" } codex-utils-cache = { path = "utils/cache" } codex-utils-image = { path = "utils/image" } codex-utils-json-to-toml = { path = "utils/json-to-toml" } @@ -96,6 +100,7 @@ codex-utils-readiness = { path = "utils/readiness" } codex-utils-string = { path = "utils/string" } codex-windows-sandbox = { path = "windows-sandbox-rs" } core_test_support = { path = "core/tests/common" } +exec_server_test_support = { path = "exec-server/tests/common" } mcp-types = { path = "mcp-types" } mcp_test_support = { path = "mcp-server/tests/common" } @@ -104,7 +109,6 @@ allocative = "0.3.3" ansi-to-tui = "7.0.0" anyhow = "1" arboard = { version = "3", features = ["wayland-data-control"] } -askama = "0.14" assert_cmd = "2" assert_matches = "1.5.0" async-channel = "2.3.1" @@ -138,7 +142,7 @@ icu_provider = { version = "2.1", features = ["sync"] } ignore = "0.4.23" image = { version = "^0.25.9", default-features = false } indexmap = "2.12.0" -insta = "1.43.2" +insta = "1.44.3" itertools = "0.14.0" keyring = { version = "3.6", default-features = false } landlock = "0.4.1" @@ -178,8 +182,8 @@ seccompiler = "0.5.0" sentry = "0.34.0" serde = "1" serde_json = "1" -serde_yaml = "0.9" serde_with = "3.16" +serde_yaml = "0.9" serial_test = "3.2.0" sha1 = "0.10.6" sha2 = "0.10" @@ -222,7 +226,7 @@ vt100 = "0.16.2" walkdir = "2.5.0" webbrowser = "1.0" which = "6" -wildmatch = "2.5.0" +wildmatch = "2.6.1" wiremock = "0.6" zeroize = "1.8.2" diff --git a/codex-rs/README.md b/codex-rs/README.md index 385b4c62e5..a3d1b82fb8 100644 --- a/codex-rs/README.md +++ b/codex-rs/README.md @@ -46,7 +46,7 @@ Use `codex mcp` to add/list/get/remove MCP server launchers defined in `config.t ### Notifications -You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS. +You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS. When Codex detects that it is running under WSL 2 inside Windows Terminal (`WT_SESSION` is set), the TUI automatically falls back to native Windows toast notifications so approval prompts and completed turns surface even though Windows Terminal does not implement OSC 9. ### `codex exec` to run Codex programmatically/non-interactively diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index 2858366739..116a3c62dd 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -139,6 +139,11 @@ client_request_definitions! { response: v2::ModelListResponse, }, + McpServerOauthLogin => "mcpServer/oauth/login" { + params: v2::McpServerOauthLoginParams, + response: v2::McpServerOauthLoginResponse, + }, + McpServersList => "mcpServers/list" { params: v2::ListMcpServersParams, response: v2::ListMcpServersResponse, @@ -522,8 +527,10 @@ server_notification_definitions! { ItemCompleted => "item/completed" (v2::ItemCompletedNotification), AgentMessageDelta => "item/agentMessage/delta" (v2::AgentMessageDeltaNotification), CommandExecutionOutputDelta => "item/commandExecution/outputDelta" (v2::CommandExecutionOutputDeltaNotification), + TerminalInteraction => "item/commandExecution/terminalInteraction" (v2::TerminalInteractionNotification), FileChangeOutputDelta => "item/fileChange/outputDelta" (v2::FileChangeOutputDeltaNotification), McpToolCallProgress => "item/mcpToolCall/progress" (v2::McpToolCallProgressNotification), + McpServerOauthLoginCompleted => "mcpServer/oauthLogin/completed" (v2::McpServerOauthLoginCompletedNotification), AccountUpdated => "account/updated" (v2::AccountUpdatedNotification), AccountRateLimitsUpdated => "account/rateLimits/updated" (v2::AccountRateLimitsUpdatedNotification), ReasoningSummaryTextDelta => "item/reasoning/summaryTextDelta" (v2::ReasoningSummaryTextDeltaNotification), @@ -647,7 +654,6 @@ mod tests { command: vec!["echo".to_string(), "hello".to_string()], cwd: PathBuf::from("/tmp"), reason: Some("because tests".to_string()), - risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "echo hello".to_string(), }], @@ -667,7 +673,6 @@ mod tests { "command": ["echo", "hello"], "cwd": "/tmp", "reason": "because tests", - "risk": null, "parsedCmd": [ { "type": "unknown", diff --git a/codex-rs/app-server-protocol/src/protocol/v1.rs b/codex-rs/app-server-protocol/src/protocol/v1.rs index 54f80c9fd4..853cb03b40 100644 --- a/codex-rs/app-server-protocol/src/protocol/v1.rs +++ b/codex-rs/app-server-protocol/src/protocol/v1.rs @@ -3,17 +3,16 @@ use std::path::PathBuf; use codex_protocol::ConversationId; use codex_protocol::config_types::ForcedLoginMethod; -use codex_protocol::config_types::ReasoningEffort; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::SandboxMode; use codex_protocol::config_types::Verbosity; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::ReviewDecision; -use codex_protocol::protocol::SandboxCommandAssessment; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::TurnAbortReason; @@ -226,7 +225,6 @@ pub struct ExecCommandApprovalParams { pub command: Vec, pub cwd: PathBuf, pub reason: Option, - pub risk: Option, pub parsed_cmd: Vec, } diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index e3990432f6..3429a4fc15 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -2,17 +2,20 @@ use std::collections::HashMap; use std::path::PathBuf; use crate::protocol::common::AuthMode; -use codex_protocol::ConversationId; use codex_protocol::account::PlanType; -use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment; -use codex_protocol::config_types::ReasoningEffort; +use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment; +use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::config_types::SandboxMode as CoreSandboxMode; +use codex_protocol::config_types::Verbosity; use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent; use codex_protocol::items::TurnItem as CoreTurnItem; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand; use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg; use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus; +use codex_protocol::protocol::AskForApproval as CoreAskForApproval; use codex_protocol::protocol::CodexErrorInfo as CoreCodexErrorInfo; use codex_protocol::protocol::CreditsSnapshot as CoreCreditsSnapshot; use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot; @@ -123,17 +126,68 @@ impl From for CodexErrorInfo { } } -v2_enum_from_core!( - pub enum AskForApproval from codex_protocol::protocol::AskForApproval { - UnlessTrusted, OnFailure, OnRequest, Never - } -); +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)] +#[serde(rename_all = "kebab-case")] +#[ts(rename_all = "kebab-case", export_to = "v2/")] +pub enum AskForApproval { + #[serde(rename = "untrusted")] + #[ts(rename = "untrusted")] + UnlessTrusted, + OnFailure, + OnRequest, + Never, +} -v2_enum_from_core!( - pub enum SandboxMode from codex_protocol::config_types::SandboxMode { - ReadOnly, WorkspaceWrite, DangerFullAccess +impl AskForApproval { + pub fn to_core(self) -> CoreAskForApproval { + match self { + AskForApproval::UnlessTrusted => CoreAskForApproval::UnlessTrusted, + AskForApproval::OnFailure => CoreAskForApproval::OnFailure, + AskForApproval::OnRequest => CoreAskForApproval::OnRequest, + AskForApproval::Never => CoreAskForApproval::Never, + } } -); +} + +impl From for AskForApproval { + fn from(value: CoreAskForApproval) -> Self { + match value { + CoreAskForApproval::UnlessTrusted => AskForApproval::UnlessTrusted, + CoreAskForApproval::OnFailure => AskForApproval::OnFailure, + CoreAskForApproval::OnRequest => AskForApproval::OnRequest, + CoreAskForApproval::Never => AskForApproval::Never, + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)] +#[serde(rename_all = "kebab-case")] +#[ts(rename_all = "kebab-case", export_to = "v2/")] +pub enum SandboxMode { + ReadOnly, + WorkspaceWrite, + DangerFullAccess, +} + +impl SandboxMode { + pub fn to_core(self) -> CoreSandboxMode { + match self { + SandboxMode::ReadOnly => CoreSandboxMode::ReadOnly, + SandboxMode::WorkspaceWrite => CoreSandboxMode::WorkspaceWrite, + SandboxMode::DangerFullAccess => CoreSandboxMode::DangerFullAccess, + } + } +} + +impl From for SandboxMode { + fn from(value: CoreSandboxMode) -> Self { + match value { + CoreSandboxMode::ReadOnly => SandboxMode::ReadOnly, + CoreSandboxMode::WorkspaceWrite => SandboxMode::WorkspaceWrite, + CoreSandboxMode::DangerFullAccess => SandboxMode::DangerFullAccess, + } + } +} v2_enum_from_core!( pub enum ReviewDelivery from codex_protocol::protocol::ReviewDelivery { @@ -160,6 +214,72 @@ pub enum ConfigLayerName { User, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export_to = "v2/")] +pub struct SandboxWorkspaceWrite { + #[serde(default)] + pub writable_roots: Vec, + #[serde(default)] + pub network_access: bool, + #[serde(default)] + pub exclude_tmpdir_env_var: bool, + #[serde(default)] + pub exclude_slash_tmp: bool, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export_to = "v2/")] +pub struct ToolsV2 { + #[serde(alias = "web_search_request")] + pub web_search: Option, + pub view_image: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export_to = "v2/")] +pub struct ProfileV2 { + pub model: Option, + pub model_provider: Option, + pub approval_policy: Option, + pub model_reasoning_effort: Option, + pub model_reasoning_summary: Option, + pub model_verbosity: Option, + pub chatgpt_base_url: Option, + #[serde(default, flatten)] + pub additional: HashMap, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export_to = "v2/")] +pub struct Config { + pub model: Option, + pub review_model: Option, + pub model_context_window: Option, + pub model_auto_compact_token_limit: Option, + pub model_provider: Option, + pub approval_policy: Option, + pub sandbox_mode: Option, + pub sandbox_workspace_write: Option, + pub forced_chatgpt_workspace_id: Option, + pub forced_login_method: Option, + pub tools: Option, + pub profile: Option, + #[serde(default)] + pub profiles: HashMap, + pub instructions: Option, + pub developer_instructions: Option, + pub compact_prompt: Option, + pub model_reasoning_effort: Option, + pub model_reasoning_summary: Option, + pub model_verbosity: Option, + #[serde(default, flatten)] + pub additional: HashMap, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -210,6 +330,8 @@ pub struct OverriddenMetadata { pub struct ConfigWriteResponse { pub status: WriteStatus, pub version: String, + /// Canonical path to the config file that was written. + pub file_path: String, pub overridden_metadata: Option, } @@ -236,7 +358,7 @@ pub struct ConfigReadParams { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct ConfigReadResponse { - pub config: JsonValue, + pub config: Config, pub origins: HashMap, #[serde(skip_serializing_if = "Option::is_none")] pub layers: Option>, @@ -246,10 +368,11 @@ pub struct ConfigReadResponse { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct ConfigValueWriteParams { - pub file_path: String, pub key_path: String, pub value: JsonValue, pub merge_strategy: MergeStrategy, + /// Path to the config file to write; defaults to the user's `config.toml` when omitted. + pub file_path: Option, pub expected_version: Option, } @@ -257,8 +380,9 @@ pub struct ConfigValueWriteParams { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct ConfigBatchWriteParams { - pub file_path: String, pub edits: Vec, + /// Path to the config file to write; defaults to the user's `config.toml` when omitted. + pub file_path: Option, pub expected_version: Option, } @@ -271,19 +395,16 @@ pub struct ConfigEdit { pub merge_strategy: MergeStrategy, } -v2_enum_from_core!( - pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel { - Low, - Medium, - High - } -); - #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub enum ApprovalDecision { Accept, + /// Approve and remember the approval for the session. + AcceptForSession, + AcceptWithExecpolicyAmendment { + execpolicy_amendment: ExecPolicyAmendment, + }, Decline, Cancel, } @@ -353,28 +474,23 @@ impl From for SandboxPolicy { } } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] -#[serde(rename_all = "camelCase")] -#[ts(export_to = "v2/")] -pub struct SandboxCommandAssessment { - pub description: String, - pub risk_level: CommandRiskLevel, +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +#[serde(transparent)] +#[ts(type = "Array", export_to = "v2/")] +pub struct ExecPolicyAmendment { + pub command: Vec, } -impl SandboxCommandAssessment { - pub fn into_core(self) -> CoreSandboxCommandAssessment { - CoreSandboxCommandAssessment { - description: self.description, - risk_level: self.risk_level.to_core(), - } +impl ExecPolicyAmendment { + pub fn into_core(self) -> CoreExecPolicyAmendment { + CoreExecPolicyAmendment::new(self.command) } } -impl From for SandboxCommandAssessment { - fn from(value: CoreSandboxCommandAssessment) -> Self { +impl From for ExecPolicyAmendment { + fn from(value: CoreExecPolicyAmendment) -> Self { Self { - description: value.description, - risk_level: CommandRiskLevel::from(value.risk_level), + command: value.command().to_vec(), } } } @@ -552,10 +668,21 @@ pub struct CancelLoginAccountParams { pub login_id: String, } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub enum CancelLoginAccountStatus { + Canceled, + NotFound, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] -pub struct CancelLoginAccountResponse {} +pub struct CancelLoginAccountResponse { + pub status: CancelLoginAccountStatus, +} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] @@ -658,13 +785,33 @@ pub struct ListMcpServersResponse { pub next_cursor: Option, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct McpServerOauthLoginParams { + pub name: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub scopes: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub timeout_secs: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct McpServerOauthLoginResponse { + pub authorization_url: String, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct FeedbackUploadParams { pub classification: String, pub reason: Option, - pub conversation_id: Option, + pub thread_id: Option, pub include_logs: bool, } @@ -939,6 +1086,9 @@ pub struct TurnError { #[ts(export_to = "v2/")] pub struct ErrorNotification { pub error: TurnError, + // Set to true if the error is transient and the app-server process will automatically retry. + // If true, this will not interrupt a turn. + pub will_retry: bool, pub thread_id: String, pub turn_id: String, } @@ -1138,15 +1288,15 @@ pub enum ThreadItem { arguments: JsonValue, result: Option, error: Option, + /// The duration of the MCP tool call in milliseconds. + #[ts(type = "number | null")] + duration_ms: Option, }, #[serde(rename_all = "camelCase")] #[ts(rename_all = "camelCase")] WebSearch { id: String, query: String }, #[serde(rename_all = "camelCase")] #[ts(rename_all = "camelCase")] - TodoList { id: String, items: Vec }, - #[serde(rename_all = "camelCase")] - #[ts(rename_all = "camelCase")] ImageView { id: String, path: String }, #[serde(rename_all = "camelCase")] #[ts(rename_all = "camelCase")] @@ -1249,15 +1399,6 @@ pub struct McpToolCallError { pub message: String, } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] -#[serde(rename_all = "camelCase")] -#[ts(export_to = "v2/")] -pub struct TodoItem { - pub id: String, - pub text: String, - pub completed: bool, -} - // === Server Notifications === // Thread/Turn lifecycle notifications and item progress events #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1307,6 +1448,7 @@ pub struct TurnDiffUpdatedNotification { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct TurnPlanUpdatedNotification { + pub thread_id: String, pub turn_id: String, pub explanation: Option, pub plan: Vec, @@ -1412,6 +1554,17 @@ pub struct ReasoningTextDeltaNotification { pub content_index: i64, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct TerminalInteractionNotification { + pub thread_id: String, + pub turn_id: String, + pub item_id: String, + pub process_id: String, + pub stdin: String, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -1442,6 +1595,17 @@ pub struct McpToolCallProgressNotification { pub message: String, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct McpServerOauthLoginCompletedNotification { + pub name: String, + pub success: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + pub error: Option, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -1468,17 +1632,8 @@ pub struct CommandExecutionRequestApprovalParams { pub item_id: String, /// Optional explanatory reason (e.g. request for network access). pub reason: Option, - /// Optional model-provided risk assessment describing the blocked command. - pub risk: Option, -} - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] -#[serde(rename_all = "camelCase")] -#[ts(export_to = "v2/")] -pub struct CommandExecutionRequestAcceptSettings { - /// If true, automatically approve this command for the duration of the session. - #[serde(default)] - pub for_session: bool, + /// Optional proposed execpolicy amendment to allow similar commands without prompting. + pub proposed_execpolicy_amendment: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1486,10 +1641,6 @@ pub struct CommandExecutionRequestAcceptSettings { #[ts(export_to = "v2/")] pub struct CommandExecutionRequestApprovalResponse { pub decision: ApprovalDecision, - /// Optional approval settings for when the decision is `accept`. - /// Ignored if the decision is `decline` or `cancel`. - #[serde(default)] - pub accept_settings: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1526,6 +1677,7 @@ pub struct RateLimitSnapshot { pub primary: Option, pub secondary: Option, pub credits: Option, + pub plan_type: Option, } impl From for RateLimitSnapshot { @@ -1534,6 +1686,7 @@ impl From for RateLimitSnapshot { primary: value.primary.map(RateLimitWindow::from), secondary: value.secondary.map(RateLimitWindow::from), credits: value.credits.map(CreditsSnapshot::from), + plan_type: value.plan_type, } } } diff --git a/codex-rs/app-server-test-client/src/main.rs b/codex-rs/app-server-test-client/src/main.rs index 8c2a38e46c..b66c59d55a 100644 --- a/codex-rs/app-server-test-client/src/main.rs +++ b/codex-rs/app-server-test-client/src/main.rs @@ -21,7 +21,6 @@ use codex_app_server_protocol::ApprovalDecision; use codex_app_server_protocol::AskForApproval; use codex_app_server_protocol::ClientInfo; use codex_app_server_protocol::ClientRequest; -use codex_app_server_protocol::CommandExecutionRequestAcceptSettings; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; use codex_app_server_protocol::FileChangeRequestApprovalParams; @@ -554,6 +553,10 @@ impl CodexClient { print!("{}", delta.delta); std::io::stdout().flush().ok(); } + ServerNotification::TerminalInteraction(delta) => { + println!("[stdin sent: {}]", delta.stdin); + std::io::stdout().flush().ok(); + } ServerNotification::ItemStarted(payload) => { println!("\n< item started: {:?}", payload.item); } @@ -753,7 +756,7 @@ impl CodexClient { turn_id, item_id, reason, - risk, + proposed_execpolicy_amendment, } = params; println!( @@ -762,13 +765,12 @@ impl CodexClient { if let Some(reason) = reason.as_deref() { println!("< reason: {reason}"); } - if let Some(risk) = risk.as_ref() { - println!("< risk assessment: {risk:?}"); + if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() { + println!("< proposed execpolicy amendment: {execpolicy_amendment:?}"); } let response = CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Accept, - accept_settings: Some(CommandExecutionRequestAcceptSettings { for_session: false }), }; self.send_server_request_response(request_id, &response)?; println!("< approved commandExecution request for item {item_id}"); diff --git a/codex-rs/app-server/Cargo.toml b/codex-rs/app-server/Cargo.toml index 99d5a7a141..948facdea6 100644 --- a/codex-rs/app-server/Cargo.toml +++ b/codex-rs/app-server/Cargo.toml @@ -26,6 +26,7 @@ codex-login = { workspace = true } codex-protocol = { workspace = true } codex-app-server-protocol = { workspace = true } codex-feedback = { workspace = true } +codex-rmcp-client = { workspace = true } codex-utils-json-to-toml = { workspace = true } chrono = { workspace = true } serde = { workspace = true, features = ["derive"] } @@ -34,6 +35,7 @@ sha2 = { workspace = true } mcp-types = { workspace = true } tempfile = { workspace = true } toml = { workspace = true } +toml_edit = { workspace = true } tokio = { workspace = true, features = [ "io-std", "macros", diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 4e94a2c133..e9dee71271 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -5,11 +5,11 @@ ## Table of Contents - [Protocol](#protocol) - [Message Schema](#message-schema) +- [Core Primitives](#core-primitives) - [Lifecycle Overview](#lifecycle-overview) - [Initialization](#initialization) -- [Core primitives](#core-primitives) -- [Thread & turn endpoints](#thread--turn-endpoints) -- [Events (work-in-progress)](#events-work-in-progress) +- [API Overview](#api-overview) +- [Events](#events) - [Auth endpoints](#auth-endpoints) ## Protocol @@ -25,6 +25,15 @@ codex app-server generate-ts --out DIR codex app-server generate-json-schema --out DIR ``` +## Core Primitives + +The API exposes three top level primitives representing an interaction between a user and Codex: +- **Thread**: A conversation between a user and the Codex agent. Each thread contains multiple turns. +- **Turn**: One turn of the conversation, typically starting with a user message and finishing with an agent message. Each turn contains multiple items. +- **Item**: Represents user inputs and agent outputs as part of the turn, persisted and used as the context for future conversations. Example items include user message, agent reasoning, agent message, shell command, file edit, etc. + +Use the thread APIs to create, list, or archive conversations. Drive a conversation with turn APIs and stream progress via turn notifications. + ## Lifecycle Overview - Initialize once: Immediately after launching the codex app-server process, send an `initialize` request with your client metadata, then emit an `initialized` notification. Any other request before this handshake gets rejected. @@ -37,28 +46,16 @@ codex app-server generate-json-schema --out DIR Clients must send a single `initialize` request before invoking any other method, then acknowledge with an `initialized` notification. The server returns the user agent string it will present to upstream services; subsequent requests issued before initialization receive a `"Not initialized"` error, and repeated `initialize` calls receive an `"Already initialized"` error. -Example: +Applications building on top of `codex app-server` should identify themselves via the `clientInfo` parameter. +Example (from OpenAI's official VSCode extension): ```json { "method": "initialize", "id": 0, "params": { "clientInfo": { "name": "codex-vscode", "title": "Codex VS Code Extension", "version": "0.1.0" } } } -{ "id": 0, "result": { "userAgent": "codex-app-server/0.1.0 codex-vscode/0.1.0" } } -{ "method": "initialized" } ``` -## Core primitives - -We have 3 top level primitives: -- Thread - a conversation between the Codex agent and a user. Each thread contains multiple turns. -- Turn - one turn of the conversation, typically starting with a user message and finishing with an agent message. Each turn contains multiple items. -- Item - represents user inputs and agent outputs as part of the turn, persisted and used as the context for future conversations. - -## Thread & turn endpoints - -The JSON-RPC API exposes dedicated methods for managing Codex conversations. Threads store long-lived conversation metadata, and turns store the per-message exchange (input → Codex output, including streamed items). Use the thread APIs to create, list, or archive sessions, then drive the conversation with turn APIs and notifications. - -### Quick reference +## API Overview - `thread/start` — create a new thread; emits `thread/started` and auto-subscribes you to turn/item events for that thread. - `thread/resume` — reopen an existing thread by id so subsequent `turn/start` calls append to it. - `thread/list` — page through stored rollouts; supports cursor-based pagination and optional `modelProviders` filtering. @@ -67,8 +64,16 @@ The JSON-RPC API exposes dedicated methods for managing Codex conversations. Thr - `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`. - `review/start` — kick off Codex’s automated reviewer for a thread; responds like `turn/start` and emits `item/started`/`item/completed` notifications with `enteredReviewMode` and `exitedReviewMode` items, plus a final assistant `agentMessage` containing the review. - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation). +- `model/list` — list available models (with reasoning effort options). +- `mcpServer/oauth/login` — start an OAuth login for a configured MCP server; returns an `authorization_url` and later emits `mcpServer/oauthLogin/completed` once the browser flow finishes. +- `mcpServers/list` — enumerate configured MCP servers with their tools, resources, resource templates, and auth status; supports cursor+limit pagination. +- `feedback/upload` — submit a feedback report (classification + optional reason/logs and conversation_id); returns the tracking thread id. +- `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation). +- `config/read` — fetch the effective config on disk after resolving config layering. +- `config/value/write` — write a single config key/value to the user's config.toml on disk. +- `config/batchWrite` — apply multiple config edits atomically to the user's config.toml on disk. -### 1) Start or resume a thread +### Example: Start or resume a thread Start a fresh thread when you need a new Codex conversation. @@ -99,7 +104,7 @@ To continue a stored session, call `thread/resume` with the `thread.id` you prev { "id": 11, "result": { "thread": { "id": "thr_123", … } } } ``` -### 2) List threads (pagination & filters) +### Example: List threads (with pagination & filters) `thread/list` lets you render a history UI. Pass any combination of: - `cursor` — opaque string from a prior response; omit for the first page. @@ -124,7 +129,7 @@ Example: When `nextCursor` is `null`, you’ve reached the final page. -### 3) Archive a thread +### Example: Archive a thread Use `thread/archive` to move the persisted rollout (stored as a JSONL file on disk) into the archived sessions directory. @@ -135,7 +140,7 @@ Use `thread/archive` to move the persisted rollout (stored as a JSONL file on di An archived thread will not appear in future calls to `thread/list`. -### 4) Start a turn (send user input) +### Example: Start a turn (send user input) Turns attach user input (text or images) to a thread and trigger Codex generation. The `input` field is a list of discriminated unions: @@ -169,7 +174,7 @@ You can optionally specify config overrides on the new turn. If specified, these } } } ``` -### 5) Interrupt an active turn +### Example: Interrupt an active turn You can cancel a running Turn with `turn/interrupt`. @@ -183,7 +188,7 @@ You can cancel a running Turn with `turn/interrupt`. The server requests cancellations for running subprocesses, then emits a `turn/completed` event with `status: "interrupted"`. Rely on the `turn/completed` to know when Codex-side cleanup is done. -### 6) Request a code review +### Example: Request a code review Use `review/start` to run Codex’s reviewer on the currently checked-out project. The request takes the thread id plus a `target` describing what should be reviewed: @@ -242,7 +247,7 @@ containing an `exitedReviewMode` item with the final review text: The `review` string is plain text that already bundles the overall explanation plus a bullet list for each structured finding (matching `ThreadItem::ExitedReviewMode` in the generated schema). Use this notification to render the reviewer output in your client. -### 7) One-off command execution +### Example: One-off command execution Run a standalone command (argv vector) in the server’s sandbox without creating a thread or turn: @@ -261,7 +266,7 @@ Notes: - `sandboxPolicy` accepts the same shape used by `turn/start` (e.g., `dangerFullAccess`, `readOnly`, `workspaceWrite` with flags). - When omitted, `timeoutMs` falls back to the server default. -## Events (work-in-progress) +## Events Event notifications are the server-initiated event stream for thread lifecycles, turn lifecycles, and the items within them. After you start or resume a thread, keep reading stdout for `thread/started`, `turn/*`, and `item/*` notifications. @@ -271,11 +276,12 @@ The app-server streams JSON-RPC notifications while a turn is running. Each turn - `turn/started` — `{ turn }` with the turn id, empty `items`, and `status: "inProgress"`. - `turn/completed` — `{ turn }` where `turn.status` is `completed`, `interrupted`, or `failed`; failures carry `{ error: { message, codexErrorInfo? } }`. +- `turn/diff/updated` — `{ threadId, turnId, diff }` represents the up-to-date snapshot of the turn-level unified diff, emitted after every FileChange item. `diff` is the latest aggregated unified diff across every file change in the turn. UIs can render this to show the full "what changed" view without stitching individual `fileChange` items. - `turn/plan/updated` — `{ turnId, explanation?, plan }` whenever the agent shares or changes its plan; each `plan` entry is `{ step, status }` with `status` in `pending`, `inProgress`, or `completed`. Today both notifications carry an empty `items` array even when item events were streamed; rely on `item/*` notifications for the canonical item list until this is fixed. -#### Thread items +#### Items `ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items: - `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`). @@ -285,6 +291,9 @@ Today both notifications carry an empty `items` array even when item events were - `fileChange` — `{id, changes, status}` describing proposed edits; `changes` list `{path, kind, diff}` and `status` is `inProgress`, `completed`, `failed`, or `declined`. - `mcpToolCall` — `{id, server, tool, status, arguments, result?, error?}` describing MCP calls; `status` is `inProgress`, `completed`, or `failed`. - `webSearch` — `{id, query}` for a web search request issued by the agent. +- `imageView` — `{id, path}` emitted when the agent invokes the image viewer tool. +- `enteredReviewMode` — `{id, review}` sent when the reviewer starts; `review` is a short user-facing label such as `"current changes"` or the requested target description. +- `exitedReviewMode` — `{id, review}` emitted when the reviewer finishes; `review` is the full plain-text review (usually, overall notes plus bullet point findings). - `compacted` - `{threadId, turnId}` when codex compacts the conversation history. This can happen automatically. All items emit two shared lifecycle events: @@ -302,7 +311,7 @@ There are additional item-specific events: - `item/commandExecution/outputDelta` — streams stdout/stderr for the command; append deltas in order to render live output alongside `aggregatedOutput` in the final item. Final `commandExecution` items include parsed `commandActions`, `status`, `exitCode`, and `durationMs` so the UI can summarize what ran and whether it succeeded. #### fileChange -`fileChange` items contain a `changes` list with `{path, kind, diff}` entries (`kind` is `add`, `delete`, or `update` with an optional `movePath`). The `status` tracks whether apply succeeded (`completed`), failed, or was `declined`. +- `item/fileChange/outputDelta` - contains the tool call response of the underlying `apply_patch` tool call. ### Errors `error` event is emitted whenever the server hits an error mid-turn (for example, upstream model errors or quota limits). Carries the same `{ error: { message, codexErrorInfo? } }` payload as `turn.status: "failed"` and may precede that terminal notification. @@ -351,7 +360,7 @@ UI guidance for IDEs: surface an approval dialog as soon as the request arrives. The JSON-RPC auth/account surface exposes request/response methods plus server-initiated notifications (no `id`). Use these to determine auth state, start or cancel logins, logout, and inspect ChatGPT rate limits. -### Quick reference +### API Overview - `account/read` — fetch current account info; optionally refresh tokens. - `account/login/start` — begin login (`apiKey` or `chatgpt`). - `account/login/completed` (notify) — emitted when a login attempt finishes (success or error). @@ -359,6 +368,8 @@ The JSON-RPC auth/account surface exposes request/response methods plus server-i - `account/logout` — sign out; triggers `account/updated`. - `account/updated` (notify) — emitted whenever auth mode changes (`authMode`: `apikey`, `chatgpt`, or `null`). - `account/rateLimits/read` — fetch ChatGPT rate limits; updates arrive via `account/rateLimits/updated` (notify). +- `account/rateLimits/updated` (notify) — emitted whenever a user's ChatGPT rate limits change. +- `mcpServer/oauthLogin/completed` (notify) — emitted after a `mcpServer/oauth/login` flow finishes for a server; payload includes `{ name, success, error? }`. ### 1) Check auth state @@ -436,9 +447,3 @@ Field notes: - `usedPercent` is current usage within the OpenAI quota window. - `windowDurationMins` is the quota window length. - `resetsAt` is a Unix timestamp (seconds) for the next reset. - -### Dev notes - -- `codex app-server generate-ts --out ` emits v2 types under `v2/`. -- `codex app-server generate-json-schema --out ` outputs `codex_app_server_protocol.schemas.json`. -- See [“Authentication and authorization” in the config docs](../../docs/config.md#authentication-and-authorization) for configuration knobs. diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index b4dd16b9a6..b0161cd9fd 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -18,6 +18,7 @@ use codex_app_server_protocol::ContextCompactedNotification; use codex_app_server_protocol::ErrorNotification; use codex_app_server_protocol::ExecCommandApprovalParams; use codex_app_server_protocol::ExecCommandApprovalResponse; +use codex_app_server_protocol::ExecPolicyAmendment as V2ExecPolicyAmendment; use codex_app_server_protocol::FileChangeOutputDeltaNotification; use codex_app_server_protocol::FileChangeRequestApprovalParams; use codex_app_server_protocol::FileChangeRequestApprovalResponse; @@ -33,9 +34,9 @@ use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind; use codex_app_server_protocol::ReasoningSummaryPartAddedNotification; use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification; use codex_app_server_protocol::ReasoningTextDeltaNotification; -use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment; use codex_app_server_protocol::ServerNotification; use codex_app_server_protocol::ServerRequestPayload; +use codex_app_server_protocol::TerminalInteractionNotification; use codex_app_server_protocol::ThreadItem; use codex_app_server_protocol::ThreadTokenUsage; use codex_app_server_protocol::ThreadTokenUsageUpdatedNotification; @@ -178,7 +179,7 @@ pub(crate) async fn apply_bespoke_event_handling( command, cwd, reason, - risk, + proposed_execpolicy_amendment, parsed_cmd, }) => match api_version { ApiVersion::V1 => { @@ -188,7 +189,6 @@ pub(crate) async fn apply_bespoke_event_handling( command, cwd, reason, - risk, parsed_cmd, }; let rx = outgoing @@ -206,6 +206,8 @@ pub(crate) async fn apply_bespoke_event_handling( .map(V2ParsedCommand::from) .collect::>(); let command_string = shlex_join(&command); + let proposed_execpolicy_amendment_v2 = + proposed_execpolicy_amendment.map(V2ExecPolicyAmendment::from); let params = CommandExecutionRequestApprovalParams { thread_id: conversation_id.to_string(), @@ -214,7 +216,7 @@ pub(crate) async fn apply_bespoke_event_handling( // and emit the corresponding EventMsg, we repurpose the call_id as the item_id. item_id: item_id.clone(), reason, - risk: risk.map(V2SandboxCommandAssessment::from), + proposed_execpolicy_amendment: proposed_execpolicy_amendment_v2, }; let rx = outgoing .send_request(ServerRequestPayload::CommandExecutionRequestApproval( @@ -332,6 +334,7 @@ pub(crate) async fn apply_bespoke_event_handling( outgoing .send_server_notification(ServerNotification::Error(ErrorNotification { error: turn_error, + will_retry: false, thread_id: conversation_id.to_string(), turn_id: event_turn_id.clone(), })) @@ -347,6 +350,7 @@ pub(crate) async fn apply_bespoke_event_handling( outgoing .send_server_notification(ServerNotification::Error(ErrorNotification { error: turn_error, + will_retry: true, thread_id: conversation_id.to_string(), turn_id: event_turn_id.clone(), })) @@ -566,6 +570,20 @@ pub(crate) async fn apply_bespoke_event_handling( .await; } } + EventMsg::TerminalInteraction(terminal_event) => { + let item_id = terminal_event.call_id.clone(); + + let notification = TerminalInteractionNotification { + thread_id: conversation_id.to_string(), + turn_id: event_turn_id.clone(), + item_id, + process_id: terminal_event.process_id, + stdin: terminal_event.stdin, + }; + outgoing + .send_server_notification(ServerNotification::TerminalInteraction(notification)) + .await; + } EventMsg::ExecCommandEnd(exec_command_end_event) => { let ExecCommandEndEvent { call_id, @@ -661,6 +679,7 @@ pub(crate) async fn apply_bespoke_event_handling( } EventMsg::PlanUpdate(plan_update_event) => { handle_turn_plan_update( + conversation_id, &event_turn_id, plan_update_event, api_version, @@ -693,6 +712,7 @@ async fn handle_turn_diff( } async fn handle_turn_plan_update( + conversation_id: ConversationId, event_turn_id: &str, plan_update_event: UpdatePlanArgs, api_version: ApiVersion, @@ -700,6 +720,7 @@ async fn handle_turn_plan_update( ) { if let ApiVersion::V2 = api_version { let notification = TurnPlanUpdatedNotification { + thread_id: conversation_id.to_string(), turn_id: event_turn_id.to_string(), explanation: plan_update_event.explanation, plan: plan_update_event @@ -1041,7 +1062,11 @@ async fn on_file_change_request_approval_response( }); let (decision, completion_status) = match response.decision { - ApprovalDecision::Accept => (ReviewDecision::Approved, None), + ApprovalDecision::Accept + | ApprovalDecision::AcceptForSession + | ApprovalDecision::AcceptWithExecpolicyAmendment { .. } => { + (ReviewDecision::Approved, None) + } ApprovalDecision::Decline => { (ReviewDecision::Denied, Some(PatchApplyStatus::Declined)) } @@ -1103,25 +1128,27 @@ async fn on_command_execution_request_approval_response( error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}"); CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Decline, - accept_settings: None, } }); - let CommandExecutionRequestApprovalResponse { - decision, - accept_settings, - } = response; + let decision = response.decision; - let (decision, completion_status) = match (decision, accept_settings) { - (ApprovalDecision::Accept, Some(settings)) if settings.for_session => { - (ReviewDecision::ApprovedForSession, None) - } - (ApprovalDecision::Accept, _) => (ReviewDecision::Approved, None), - (ApprovalDecision::Decline, _) => ( + let (decision, completion_status) = match decision { + ApprovalDecision::Accept => (ReviewDecision::Approved, None), + ApprovalDecision::AcceptForSession => (ReviewDecision::ApprovedForSession, None), + ApprovalDecision::AcceptWithExecpolicyAmendment { + execpolicy_amendment, + } => ( + ReviewDecision::ApprovedExecpolicyAmendment { + proposed_execpolicy_amendment: execpolicy_amendment.into_core(), + }, + None, + ), + ApprovalDecision::Decline => ( ReviewDecision::Denied, Some(CommandExecutionStatus::Declined), ), - (ApprovalDecision::Cancel, _) => ( + ApprovalDecision::Cancel => ( ReviewDecision::Abort, Some(CommandExecutionStatus::Declined), ), @@ -1174,6 +1201,7 @@ async fn construct_mcp_tool_call_notification( arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null), result: None, error: None, + duration_ms: None, }; ItemStartedNotification { thread_id, @@ -1182,7 +1210,7 @@ async fn construct_mcp_tool_call_notification( } } -/// simiilar to handle_mcp_tool_call_end in exec +/// similar to handle_mcp_tool_call_end in exec async fn construct_mcp_tool_call_end_notification( end_event: McpToolCallEndEvent, thread_id: String, @@ -1193,6 +1221,7 @@ async fn construct_mcp_tool_call_end_notification( } else { McpToolCallStatus::Failed }; + let duration_ms = i64::try_from(end_event.duration.as_millis()).ok(); let (result, error) = match &end_event.result { Ok(value) => ( @@ -1218,6 +1247,7 @@ async fn construct_mcp_tool_call_end_notification( arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null), result, error, + duration_ms, }; ItemCompletedNotification { thread_id, @@ -1422,7 +1452,16 @@ mod tests { ], }; - handle_turn_plan_update("turn-123", update, ApiVersion::V2, &outgoing).await; + let conversation_id = ConversationId::new(); + + handle_turn_plan_update( + conversation_id, + "turn-123", + update, + ApiVersion::V2, + &outgoing, + ) + .await; let msg = rx .recv() @@ -1430,6 +1469,7 @@ mod tests { .ok_or_else(|| anyhow!("should send one notification"))?; match msg { OutgoingMessage::AppServerNotification(ServerNotification::TurnPlanUpdated(n)) => { + assert_eq!(n.thread_id, conversation_id.to_string()); assert_eq!(n.turn_id, "turn-123"); assert_eq!(n.explanation.as_deref(), Some("need plan")); assert_eq!(n.plan.len(), 2); @@ -1480,6 +1520,7 @@ mod tests { unlimited: false, balance: Some("5".to_string()), }), + plan_type: None, }; handle_token_count_event( @@ -1584,6 +1625,7 @@ mod tests { arguments: serde_json::json!({"server": ""}), result: None, error: None, + duration_ms: None, }, }; @@ -1737,6 +1779,7 @@ mod tests { arguments: JsonValue::Null, result: None, error: None, + duration_ms: None, }, }; @@ -1790,6 +1833,7 @@ mod tests { structured_content: None, }), error: None, + duration_ms: Some(0), }, }; @@ -1831,6 +1875,7 @@ mod tests { error: Some(McpToolCallError { message: "boom".to_string(), }), + duration_ms: Some(1), }, }; diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 245486e482..c62734f24e 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -19,6 +19,7 @@ use codex_app_server_protocol::AuthMode; use codex_app_server_protocol::AuthStatusChangeNotification; use codex_app_server_protocol::CancelLoginAccountParams; use codex_app_server_protocol::CancelLoginAccountResponse; +use codex_app_server_protocol::CancelLoginAccountStatus; use codex_app_server_protocol::CancelLoginChatGptResponse; use codex_app_server_protocol::ClientRequest; use codex_app_server_protocol::CommandExecParams; @@ -55,6 +56,9 @@ use codex_app_server_protocol::LoginChatGptResponse; use codex_app_server_protocol::LogoutAccountResponse; use codex_app_server_protocol::LogoutChatGptResponse; use codex_app_server_protocol::McpServer; +use codex_app_server_protocol::McpServerOauthLoginCompletedNotification; +use codex_app_server_protocol::McpServerOauthLoginParams; +use codex_app_server_protocol::McpServerOauthLoginResponse; use codex_app_server_protocol::ModelListParams; use codex_app_server_protocol::ModelListResponse; use codex_app_server_protocol::NewConversationParams; @@ -115,6 +119,7 @@ use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::config::ConfigToml; use codex_core::config::edit::ConfigEditsBuilder; +use codex_core::config::types::McpServerTransportConfig; use codex_core::config_loader::load_config_as_toml; use codex_core::default_client::get_codex_user_agent; use codex_core::exec::ExecParams; @@ -132,6 +137,7 @@ use codex_core::protocol::ReviewRequest; use codex_core::protocol::ReviewTarget as CoreReviewTarget; use codex_core::protocol::SessionConfiguredEvent; use codex_core::read_head_for_summary; +use codex_core::sandboxing::SandboxPermissions; use codex_feedback::CodexFeedback; use codex_login::ServerOptions as LoginServerOptions; use codex_login::ShutdownHandle; @@ -147,6 +153,7 @@ use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::USER_MESSAGE_BEGIN; use codex_protocol::user_input::UserInput as CoreInputItem; +use codex_rmcp_client::perform_oauth_login_return_url; use codex_utils_json_to_toml::json_to_toml; use std::collections::HashMap; use std::collections::HashSet; @@ -161,6 +168,7 @@ use std::time::Duration; use tokio::select; use tokio::sync::Mutex; use tokio::sync::oneshot; +use toml::Value as TomlValue; use tracing::error; use tracing::info; use tracing::warn; @@ -178,6 +186,9 @@ pub(crate) struct TurnSummary { pub(crate) type TurnSummaryStore = Arc>>; +const THREAD_LIST_DEFAULT_LIMIT: usize = 25; +const THREAD_LIST_MAX_LIMIT: usize = 100; + // Duration before a ChatGPT login attempt is abandoned. const LOGIN_CHATGPT_TIMEOUT: Duration = Duration::from_secs(10 * 60); struct ActiveLogin { @@ -185,6 +196,11 @@ struct ActiveLogin { login_id: Uuid, } +#[derive(Clone, Copy, Debug)] +enum CancelLoginError { + NotFound(Uuid), +} + impl Drop for ActiveLogin { fn drop(&mut self) { self.shutdown_handle.shutdown(); @@ -198,6 +214,7 @@ pub(crate) struct CodexMessageProcessor { outgoing: Arc, codex_linux_sandbox_exe: Option, config: Arc, + cli_overrides: Vec<(String, TomlValue)>, conversation_listeners: HashMap>, active_login: Arc>>, // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives. @@ -244,6 +261,7 @@ impl CodexMessageProcessor { outgoing: Arc, codex_linux_sandbox_exe: Option, config: Arc, + cli_overrides: Vec<(String, TomlValue)>, feedback: CodexFeedback, ) -> Self { Self { @@ -252,6 +270,7 @@ impl CodexMessageProcessor { outgoing, codex_linux_sandbox_exe, config, + cli_overrides, conversation_listeners: HashMap::new(), active_login: Arc::new(Mutex::new(None)), pending_interrupts: Arc::new(Mutex::new(HashMap::new())), @@ -261,6 +280,16 @@ impl CodexMessageProcessor { } } + async fn load_latest_config(&self) -> Result { + Config::load_with_cli_overrides(self.cli_overrides.clone(), ConfigOverrides::default()) + .await + .map_err(|err| JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!("failed to reload config: {err}"), + data: None, + }) + } + fn review_request_from_target( target: ApiReviewTarget, ) -> Result<(ReviewRequest, String), JSONRPCErrorError> { @@ -369,6 +398,9 @@ impl CodexMessageProcessor { ClientRequest::ModelList { request_id, params } => { self.list_models(request_id, params).await; } + ClientRequest::McpServerOauthLogin { request_id, params } => { + self.mcp_server_oauth_login(request_id, params).await; + } ClientRequest::McpServersList { request_id, params } => { self.list_mcp_servers(request_id, params).await; } @@ -802,7 +834,7 @@ impl CodexMessageProcessor { async fn cancel_login_chatgpt_common( &mut self, login_id: Uuid, - ) -> std::result::Result<(), JSONRPCErrorError> { + ) -> std::result::Result<(), CancelLoginError> { let mut guard = self.active_login.lock().await; if guard.as_ref().map(|l| l.login_id) == Some(login_id) { if let Some(active) = guard.take() { @@ -810,11 +842,7 @@ impl CodexMessageProcessor { } Ok(()) } else { - Err(JSONRPCErrorError { - code: INVALID_REQUEST_ERROR_CODE, - message: format!("login id not found: {login_id}"), - data: None, - }) + Err(CancelLoginError::NotFound(login_id)) } } @@ -825,7 +853,12 @@ impl CodexMessageProcessor { .send_response(request_id, CancelLoginChatGptResponse {}) .await; } - Err(error) => { + Err(CancelLoginError::NotFound(missing_login_id)) => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("login id not found: {missing_login_id}"), + data: None, + }; self.outgoing.send_error(request_id, error).await; } } @@ -834,16 +867,14 @@ impl CodexMessageProcessor { async fn cancel_login_v2(&mut self, request_id: RequestId, params: CancelLoginAccountParams) { let login_id = params.login_id; match Uuid::parse_str(&login_id) { - Ok(uuid) => match self.cancel_login_chatgpt_common(uuid).await { - Ok(()) => { - self.outgoing - .send_response(request_id, CancelLoginAccountResponse {}) - .await; - } - Err(error) => { - self.outgoing.send_error(request_id, error).await; - } - }, + Ok(uuid) => { + let status = match self.cancel_login_chatgpt_common(uuid).await { + Ok(()) => CancelLoginAccountStatus::Canceled, + Err(CancelLoginError::NotFound(_)) => CancelLoginAccountStatus::NotFound, + }; + let response = CancelLoginAccountResponse { status }; + self.outgoing.send_response(request_id, response).await; + } Err(_) => { let error = JSONRPCErrorError { code: INVALID_REQUEST_ERROR_CODE, @@ -1169,7 +1200,7 @@ impl CodexMessageProcessor { cwd, expiration: timeout_ms.into(), env, - with_escalated_permissions: None, + sandbox_permissions: SandboxPermissions::UseDefault, justification: None, arg0: None, }; @@ -1485,10 +1516,12 @@ impl CodexMessageProcessor { model_providers, } = params; - let page_size = limit.unwrap_or(25).max(1) as usize; - + let requested_page_size = limit + .map(|value| value as usize) + .unwrap_or(THREAD_LIST_DEFAULT_LIMIT) + .clamp(1, THREAD_LIST_MAX_LIMIT); let (summaries, next_cursor) = match self - .list_conversations_common(page_size, cursor, model_providers) + .list_conversations_common(requested_page_size, cursor, model_providers) .await { Ok(r) => r, @@ -1499,7 +1532,6 @@ impl CodexMessageProcessor { }; let data = summaries.into_iter().map(summary_to_thread).collect(); - let response = ThreadListResponse { data, next_cursor }; self.outgoing.send_response(request_id, response).await; } @@ -1777,10 +1809,12 @@ impl CodexMessageProcessor { cursor, model_providers, } = params; - let page_size = page_size.unwrap_or(25).max(1); + let requested_page_size = page_size + .unwrap_or(THREAD_LIST_DEFAULT_LIMIT) + .clamp(1, THREAD_LIST_MAX_LIMIT); match self - .list_conversations_common(page_size, cursor, model_providers) + .list_conversations_common(requested_page_size, cursor, model_providers) .await { Ok((items, next_cursor)) => { @@ -1795,12 +1829,15 @@ impl CodexMessageProcessor { async fn list_conversations_common( &self, - page_size: usize, + requested_page_size: usize, cursor: Option, model_providers: Option>, ) -> Result<(Vec, Option), JSONRPCErrorError> { - let cursor_obj: Option = cursor.as_ref().and_then(|s| parse_cursor(s)); - let cursor_ref = cursor_obj.as_ref(); + let mut cursor_obj: Option = cursor.as_ref().and_then(|s| parse_cursor(s)); + let mut last_cursor = cursor_obj.clone(); + let mut remaining = requested_page_size; + let mut items = Vec::with_capacity(requested_page_size); + let mut next_cursor: Option = None; let model_provider_filter = match model_providers { Some(providers) => { @@ -1814,56 +1851,76 @@ impl CodexMessageProcessor { }; let fallback_provider = self.config.model_provider_id.clone(); - let page = match RolloutRecorder::list_conversations( - &self.config.codex_home, - page_size, - cursor_ref, - INTERACTIVE_SESSION_SOURCES, - model_provider_filter.as_deref(), - fallback_provider.as_str(), - ) - .await - { - Ok(p) => p, - Err(err) => { - return Err(JSONRPCErrorError { - code: INTERNAL_ERROR_CODE, - message: format!("failed to list conversations: {err}"), - data: None, - }); + while remaining > 0 { + let page_size = remaining.min(THREAD_LIST_MAX_LIMIT); + let page = RolloutRecorder::list_conversations( + &self.config.codex_home, + page_size, + cursor_obj.as_ref(), + INTERACTIVE_SESSION_SOURCES, + model_provider_filter.as_deref(), + fallback_provider.as_str(), + ) + .await + .map_err(|err| JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!("failed to list conversations: {err}"), + data: None, + })?; + + let mut filtered = page + .items + .into_iter() + .filter_map(|it| { + let session_meta_line = it.head.first().and_then(|first| { + serde_json::from_value::(first.clone()).ok() + })?; + extract_conversation_summary( + it.path, + &it.head, + &session_meta_line.meta, + session_meta_line.git.as_ref(), + fallback_provider.as_str(), + ) + }) + .collect::>(); + if filtered.len() > remaining { + filtered.truncate(remaining); } - }; + items.extend(filtered); + remaining = requested_page_size.saturating_sub(items.len()); - let items = page - .items - .into_iter() - .filter_map(|it| { - let session_meta_line = it.head.first().and_then(|first| { - serde_json::from_value::(first.clone()).ok() - })?; - extract_conversation_summary( - it.path, - &it.head, - &session_meta_line.meta, - session_meta_line.git.as_ref(), - fallback_provider.as_str(), - ) - }) - .collect::>(); + // Encode RolloutCursor into the JSON-RPC string form returned to clients. + let next_cursor_value = page.next_cursor.clone(); + next_cursor = next_cursor_value + .as_ref() + .and_then(|cursor| serde_json::to_value(cursor).ok()) + .and_then(|value| value.as_str().map(str::to_owned)); + if remaining == 0 { + break; + } - // Encode next_cursor as a plain string - let next_cursor = page - .next_cursor - .and_then(|cursor| serde_json::to_value(&cursor).ok()) - .and_then(|value| value.as_str().map(str::to_owned)); + match next_cursor_value { + Some(cursor_val) if remaining > 0 => { + // Break if our pagination would reuse the same cursor again; this avoids + // an infinite loop when filtering drops everything on the page. + if last_cursor.as_ref() == Some(&cursor_val) { + next_cursor = None; + break; + } + last_cursor = Some(cursor_val.clone()); + cursor_obj = Some(cursor_val); + } + _ => break, + } + } Ok((items, next_cursor)) } async fn list_models(&self, request_id: RequestId, params: ModelListParams) { let ModelListParams { limit, cursor } = params; - let auth_mode = self.auth_manager.auth().map(|auth| auth.mode); - let models = supported_models(auth_mode); + let models = supported_models(self.conversation_manager.clone(), &self.config).await; let total = models.len(); if total == 0 { @@ -1917,6 +1974,110 @@ impl CodexMessageProcessor { self.outgoing.send_response(request_id, response).await; } + async fn mcp_server_oauth_login( + &self, + request_id: RequestId, + params: McpServerOauthLoginParams, + ) { + let config = match self.load_latest_config().await { + Ok(config) => config, + Err(error) => { + self.outgoing.send_error(request_id, error).await; + return; + } + }; + + if !config.features.enabled(Feature::RmcpClient) { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: "OAuth login is only supported when [features].rmcp_client is true in config.toml".to_string(), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + + let McpServerOauthLoginParams { + name, + scopes, + timeout_secs, + } = params; + + let Some(server) = config.mcp_servers.get(&name) else { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("No MCP server named '{name}' found."), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + }; + + let (url, http_headers, env_http_headers) = match &server.transport { + McpServerTransportConfig::StreamableHttp { + url, + http_headers, + env_http_headers, + .. + } => (url.clone(), http_headers.clone(), env_http_headers.clone()), + _ => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: "OAuth login is only supported for streamable HTTP servers." + .to_string(), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + }; + + match perform_oauth_login_return_url( + &name, + &url, + config.mcp_oauth_credentials_store_mode, + http_headers, + env_http_headers, + scopes.as_deref().unwrap_or_default(), + timeout_secs, + ) + .await + { + Ok(handle) => { + let authorization_url = handle.authorization_url().to_string(); + let notification_name = name.clone(); + let outgoing = Arc::clone(&self.outgoing); + + tokio::spawn(async move { + let (success, error) = match handle.wait().await { + Ok(()) => (true, None), + Err(err) => (false, Some(err.to_string())), + }; + + let notification = ServerNotification::McpServerOauthLoginCompleted( + McpServerOauthLoginCompletedNotification { + name: notification_name, + success, + error, + }, + ); + outgoing.send_server_notification(notification).await; + }); + + let response = McpServerOauthLoginResponse { authorization_url }; + self.outgoing.send_response(request_id, response).await; + } + Err(err) => { + let error = JSONRPCErrorError { + code: INTERNAL_ERROR_CODE, + message: format!("failed to login to MCP server '{name}': {err}"), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + } + } + } + async fn list_mcp_servers(&self, request_id: RequestId, params: ListMcpServersParams) { let snapshot = collect_mcp_snapshot(self.config.as_ref()).await; @@ -2670,7 +2831,7 @@ impl CodexMessageProcessor { })?; let mut config = self.config.as_ref().clone(); - config.model = self.config.review_model.clone(); + config.model = Some(self.config.review_model.clone()); let NewConversation { conversation_id, @@ -3021,10 +3182,26 @@ impl CodexMessageProcessor { let FeedbackUploadParams { classification, reason, - conversation_id, + thread_id, include_logs, } = params; + let conversation_id = match thread_id.as_deref() { + Some(thread_id) => match ConversationId::from_string(thread_id) { + Ok(conversation_id) => Some(conversation_id), + Err(err) => { + let error = JSONRPCErrorError { + code: INVALID_REQUEST_ERROR_CODE, + message: format!("invalid thread id: {err}"), + data: None, + }; + self.outgoing.send_error(request_id, error).await; + return; + } + }, + None => None, + }; + let snapshot = self.feedback.snapshot(conversation_id); let thread_id = snapshot.thread_id.clone(); diff --git a/codex-rs/app-server/src/config_api.rs b/codex-rs/app-server/src/config_api.rs index 68bbdd8c66..c1eaf62d26 100644 --- a/codex-rs/app-server/src/config_api.rs +++ b/codex-rs/app-server/src/config_api.rs @@ -1,6 +1,6 @@ use crate::error_code::INTERNAL_ERROR_CODE; use crate::error_code::INVALID_REQUEST_ERROR_CODE; -use anyhow::anyhow; +use codex_app_server_protocol::Config; use codex_app_server_protocol::ConfigBatchWriteParams; use codex_app_server_protocol::ConfigLayer; use codex_app_server_protocol::ConfigLayerMetadata; @@ -15,6 +15,8 @@ use codex_app_server_protocol::MergeStrategy; use codex_app_server_protocol::OverriddenMetadata; use codex_app_server_protocol::WriteStatus; use codex_core::config::ConfigToml; +use codex_core::config::edit::ConfigEdit; +use codex_core::config::edit::ConfigEditsBuilder; use codex_core::config_loader::LoadedConfigLayers; use codex_core::config_loader::LoaderOverrides; use codex_core::config_loader::load_config_layers_with_overrides; @@ -26,9 +28,8 @@ use sha2::Sha256; use std::collections::HashMap; use std::path::Path; use std::path::PathBuf; -use tempfile::NamedTempFile; -use tokio::task; use toml::Value as TomlValue; +use toml_edit::Item as TomlItem; const SESSION_FLAGS_SOURCE: &str = "--config"; const MDM_SOURCE: &str = "com.openai.codex/config_toml_base64"; @@ -75,8 +76,10 @@ impl ConfigApi { let effective = layers.effective_config(); validate_config(&effective).map_err(|err| internal_error("invalid configuration", err))?; + let config: Config = serde_json::from_value(to_json_value(&effective)) + .map_err(|err| internal_error("failed to deserialize configuration", err))?; let response = ConfigReadResponse { - config: to_json_value(&effective), + config, origins: layers.origins(), layers: params.include_layers.then(|| layers.layers_high_to_low()), }; @@ -109,12 +112,17 @@ impl ConfigApi { async fn apply_edits( &self, - file_path: String, + file_path: Option, expected_version: Option, edits: Vec<(String, JsonValue, MergeStrategy)>, ) -> Result { let allowed_path = self.codex_home.join(CONFIG_FILE_NAME); - if !paths_match(&allowed_path, &file_path) { + let provided_path = file_path + .as_ref() + .map(PathBuf::from) + .unwrap_or_else(|| allowed_path.clone()); + + if !paths_match(&allowed_path, &provided_path) { return Err(config_write_error( ConfigWriteErrorCode::ConfigLayerReadonly, "Only writes to the user config are allowed", @@ -136,19 +144,20 @@ impl ConfigApi { } let mut user_config = layers.user.config.clone(); - let mut mutated = false; let mut parsed_segments = Vec::new(); + let mut config_edits = Vec::new(); for (key_path, value, strategy) in edits.into_iter() { let segments = parse_key_path(&key_path).map_err(|message| { config_write_error(ConfigWriteErrorCode::ConfigValidationError, message) })?; + let original_value = value_at_path(&user_config, &segments).cloned(); let parsed_value = parse_value(value).map_err(|message| { config_write_error(ConfigWriteErrorCode::ConfigValidationError, message) })?; - let changed = apply_merge(&mut user_config, &segments, parsed_value.as_ref(), strategy) - .map_err(|err| match err { + apply_merge(&mut user_config, &segments, parsed_value.as_ref(), strategy).map_err( + |err| match err { MergeError::PathNotFound => config_write_error( ConfigWriteErrorCode::ConfigPathNotFound, "Path not found", @@ -156,9 +165,24 @@ impl ConfigApi { MergeError::Validation(message) => { config_write_error(ConfigWriteErrorCode::ConfigValidationError, message) } - })?; + }, + )?; + + let updated_value = value_at_path(&user_config, &segments).cloned(); + if original_value != updated_value { + let edit = match updated_value { + Some(value) => ConfigEdit::SetPath { + segments: segments.clone(), + value: toml_value_to_item(&value) + .map_err(|err| internal_error("failed to build config edits", err))?, + }, + None => ConfigEdit::ClearPath { + segments: segments.clone(), + }, + }; + config_edits.push(edit); + } - mutated |= changed; parsed_segments.push(segments); } @@ -178,8 +202,10 @@ impl ConfigApi { ) })?; - if mutated { - self.persist_user_config(&user_config) + if !config_edits.is_empty() { + ConfigEditsBuilder::new(&self.codex_home) + .with_edits(config_edits) + .apply() .await .map_err(|err| internal_error("failed to persist config.toml", err))?; } @@ -190,9 +216,16 @@ impl ConfigApi { .map(|_| WriteStatus::OkOverridden) .unwrap_or(WriteStatus::Ok); + let file_path = provided_path + .canonicalize() + .unwrap_or(provided_path.clone()) + .display() + .to_string(); + Ok(ConfigWriteResponse { status, version: updated_layers.user.version.clone(), + file_path, overridden_metadata: overridden, }) } @@ -241,25 +274,6 @@ impl ConfigApi { mdm, }) } - - async fn persist_user_config(&self, user_config: &TomlValue) -> anyhow::Result<()> { - let codex_home = self.codex_home.clone(); - let serialized = toml::to_string_pretty(user_config)?; - - task::spawn_blocking(move || -> anyhow::Result<()> { - std::fs::create_dir_all(&codex_home)?; - - let target = codex_home.join(CONFIG_FILE_NAME); - let tmp = NamedTempFile::new_in(&codex_home)?; - std::fs::write(tmp.path(), serialized.as_bytes())?; - tmp.persist(&target)?; - Ok(()) - }) - .await - .map_err(|err| anyhow!("config persistence task panicked: {err}"))??; - - Ok(()) - } } fn parse_value(value: JsonValue) -> Result, String> { @@ -410,6 +424,44 @@ fn clear_path(root: &mut TomlValue, segments: &[String]) -> Result anyhow::Result { + match value { + TomlValue::Table(table) => { + let mut table_item = toml_edit::Table::new(); + table_item.set_implicit(false); + for (key, val) in table { + table_item.insert(key, toml_value_to_item(val)?); + } + Ok(TomlItem::Table(table_item)) + } + other => Ok(TomlItem::Value(toml_value_to_value(other)?)), + } +} + +fn toml_value_to_value(value: &TomlValue) -> anyhow::Result { + match value { + TomlValue::String(val) => Ok(toml_edit::Value::from(val.clone())), + TomlValue::Integer(val) => Ok(toml_edit::Value::from(*val)), + TomlValue::Float(val) => Ok(toml_edit::Value::from(*val)), + TomlValue::Boolean(val) => Ok(toml_edit::Value::from(*val)), + TomlValue::Datetime(val) => Ok(toml_edit::Value::from(*val)), + TomlValue::Array(items) => { + let mut array = toml_edit::Array::new(); + for item in items { + array.push(toml_value_to_value(item)?); + } + Ok(toml_edit::Value::Array(array)) + } + TomlValue::Table(table) => { + let mut inline = toml_edit::InlineTable::new(); + for (key, val) in table { + inline.insert(key, toml_value_to_value(val)?); + } + Ok(toml_edit::Value::InlineTable(inline)) + } + } +} + #[derive(Clone)] struct LayerState { name: ConfigLayerName, @@ -587,15 +639,14 @@ fn canonical_json(value: &JsonValue) -> JsonValue { } } -fn paths_match(expected: &Path, provided: &str) -> bool { - let provided_path = PathBuf::from(provided); +fn paths_match(expected: &Path, provided: &Path) -> bool { if let (Ok(expanded_expected), Ok(expanded_provided)) = - (expected.canonicalize(), provided_path.canonicalize()) + (expected.canonicalize(), provided.canonicalize()) { return expanded_expected == expanded_provided; } - expected == provided_path + expected == provided } fn value_at_path<'a>(root: &'a TomlValue, segments: &[String]) -> Option<&'a TomlValue> { @@ -724,9 +775,105 @@ fn config_write_error(code: ConfigWriteErrorCode, message: impl Into) -> #[cfg(test)] mod tests { use super::*; + use anyhow::Result; + use codex_app_server_protocol::AskForApproval; use pretty_assertions::assert_eq; use tempfile::tempdir; + #[test] + fn toml_value_to_item_handles_nested_config_tables() { + let config = r#" +[mcp_servers.docs] +command = "docs-server" + +[mcp_servers.docs.http_headers] +X-Doc = "42" +"#; + + let value: TomlValue = toml::from_str(config).expect("parse config example"); + let item = toml_value_to_item(&value).expect("convert to toml_edit item"); + + let root = item.as_table().expect("root table"); + assert!(!root.is_implicit(), "root table should be explicit"); + + let mcp_servers = root + .get("mcp_servers") + .and_then(TomlItem::as_table) + .expect("mcp_servers table"); + assert!( + !mcp_servers.is_implicit(), + "mcp_servers table should be explicit" + ); + + let docs = mcp_servers + .get("docs") + .and_then(TomlItem::as_table) + .expect("docs table"); + assert_eq!( + docs.get("command") + .and_then(TomlItem::as_value) + .and_then(toml_edit::Value::as_str), + Some("docs-server") + ); + + let http_headers = docs + .get("http_headers") + .and_then(TomlItem::as_table) + .expect("http_headers table"); + assert_eq!( + http_headers + .get("X-Doc") + .and_then(TomlItem::as_value) + .and_then(toml_edit::Value::as_str), + Some("42") + ); + } + + #[tokio::test] + async fn write_value_preserves_comments_and_order() -> Result<()> { + let tmp = tempdir().expect("tempdir"); + let original = r#"# Codex user configuration +model = "gpt-5" +approval_policy = "on-request" + +[notice] +# Preserve this comment +hide_full_access_warning = true + +[features] +unified_exec = true +"#; + std::fs::write(tmp.path().join(CONFIG_FILE_NAME), original)?; + + let api = ConfigApi::new(tmp.path().to_path_buf(), vec![]); + api.write_value(ConfigValueWriteParams { + file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()), + key_path: "features.remote_compaction".to_string(), + value: json!(true), + merge_strategy: MergeStrategy::Replace, + expected_version: None, + }) + .await + .expect("write succeeds"); + + let updated = + std::fs::read_to_string(tmp.path().join(CONFIG_FILE_NAME)).expect("read config"); + let expected = r#"# Codex user configuration +model = "gpt-5" +approval_policy = "on-request" + +[notice] +# Preserve this comment +hide_full_access_warning = true + +[features] +unified_exec = true +remote_compaction = true +"#; + assert_eq!(updated, expected); + Ok(()) + } + #[tokio::test] async fn read_includes_origins_and_layers() { let tmp = tempdir().expect("tempdir"); @@ -752,10 +899,7 @@ mod tests { .await .expect("response"); - assert_eq!( - response.config.get("approval_policy"), - Some(&json!("never")) - ); + assert_eq!(response.config.approval_policy, Some(AskForApproval::Never)); assert_eq!( response @@ -795,7 +939,7 @@ mod tests { let result = api .write_value(ConfigValueWriteParams { - file_path: tmp.path().join(CONFIG_FILE_NAME).display().to_string(), + file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()), key_path: "approval_policy".to_string(), value: json!("never"), merge_strategy: MergeStrategy::Replace, @@ -810,8 +954,10 @@ mod tests { }) .await .expect("read"); - let config_object = read_after.config.as_object().expect("object"); - assert_eq!(config_object.get("approval_policy"), Some(&json!("never"))); + assert_eq!( + read_after.config.approval_policy, + Some(AskForApproval::Never) + ); assert_eq!( read_after .origins @@ -832,7 +978,7 @@ mod tests { let api = ConfigApi::new(tmp.path().to_path_buf(), vec![]); let error = api .write_value(ConfigValueWriteParams { - file_path: tmp.path().join(CONFIG_FILE_NAME).display().to_string(), + file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()), key_path: "model".to_string(), value: json!("gpt-5"), merge_strategy: MergeStrategy::Replace, @@ -852,6 +998,30 @@ mod tests { ); } + #[tokio::test] + async fn write_value_defaults_to_user_config_path() { + let tmp = tempdir().expect("tempdir"); + std::fs::write(tmp.path().join(CONFIG_FILE_NAME), "").unwrap(); + + let api = ConfigApi::new(tmp.path().to_path_buf(), vec![]); + api.write_value(ConfigValueWriteParams { + file_path: None, + key_path: "model".to_string(), + value: json!("gpt-new"), + merge_strategy: MergeStrategy::Replace, + expected_version: None, + }) + .await + .expect("write succeeds"); + + let contents = + std::fs::read_to_string(tmp.path().join(CONFIG_FILE_NAME)).expect("read config"); + assert!( + contents.contains("model = \"gpt-new\""), + "config.toml should be updated even when file_path is omitted" + ); + } + #[tokio::test] async fn invalid_user_value_rejected_even_if_overridden_by_managed() { let tmp = tempdir().expect("tempdir"); @@ -872,7 +1042,7 @@ mod tests { let error = api .write_value(ConfigValueWriteParams { - file_path: tmp.path().join(CONFIG_FILE_NAME).display().to_string(), + file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()), key_path: "approval_policy".to_string(), value: json!("bogus"), merge_strategy: MergeStrategy::Replace, @@ -926,7 +1096,7 @@ mod tests { .await .expect("response"); - assert_eq!(response.config.get("model"), Some(&json!("system"))); + assert_eq!(response.config.model.as_deref(), Some("system")); assert_eq!( response.origins.get("model").expect("origin").name, ConfigLayerName::System @@ -957,7 +1127,7 @@ mod tests { let result = api .write_value(ConfigValueWriteParams { - file_path: tmp.path().join(CONFIG_FILE_NAME).display().to_string(), + file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()), key_path: "approval_policy".to_string(), value: json!("on-request"), merge_strategy: MergeStrategy::Replace, diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 90560e9b3c..6a6cf5edb2 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -59,6 +59,7 @@ impl MessageProcessor { outgoing.clone(), codex_linux_sandbox_exe, Arc::clone(&config), + cli_overrides.clone(), feedback, ); let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides); diff --git a/codex-rs/app-server/src/models.rs b/codex-rs/app-server/src/models.rs index d03795c2d4..2141160354 100644 --- a/codex-rs/app-server/src/models.rs +++ b/codex-rs/app-server/src/models.rs @@ -1,12 +1,19 @@ -use codex_app_server_protocol::AuthMode; +use std::sync::Arc; + use codex_app_server_protocol::Model; use codex_app_server_protocol::ReasoningEffortOption; -use codex_common::model_presets::ModelPreset; -use codex_common::model_presets::ReasoningEffortPreset; -use codex_common::model_presets::builtin_model_presets; +use codex_core::ConversationManager; +use codex_core::config::Config; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ReasoningEffortPreset; -pub fn supported_models(auth_mode: Option) -> Vec { - builtin_model_presets(auth_mode) +pub async fn supported_models( + conversation_manager: Arc, + config: &Config, +) -> Vec { + conversation_manager + .list_models(config) + .await .into_iter() .map(model_from_preset) .collect() @@ -27,7 +34,7 @@ fn model_from_preset(preset: ModelPreset) -> Model { } fn reasoning_efforts_from_preset( - efforts: &'static [ReasoningEffortPreset], + efforts: Vec, ) -> Vec { efforts .iter() diff --git a/codex-rs/app-server/src/outgoing_message.rs b/codex-rs/app-server/src/outgoing_message.rs index b7f331c9d4..83ac26fd48 100644 --- a/codex-rs/app-server/src/outgoing_message.rs +++ b/codex-rs/app-server/src/outgoing_message.rs @@ -16,6 +16,9 @@ use tracing::warn; use crate::error_code::INTERNAL_ERROR_CODE; +#[cfg(test)] +use codex_protocol::account::PlanType; + /// Sends messages to the client and manages request callbacks. pub(crate) struct OutgoingMessageSender { next_request_id: AtomicI64, @@ -230,6 +233,7 @@ mod tests { }), secondary: None, credits: None, + plan_type: Some(PlanType::Plus), }, }); @@ -245,7 +249,8 @@ mod tests { "resetsAt": 123 }, "secondary": null, - "credits": null + "credits": null, + "planType": "plus" } }, }), diff --git a/codex-rs/app-server/tests/common/lib.rs b/codex-rs/app-server/tests/common/lib.rs index 6fd54a66dc..a095a713a0 100644 --- a/codex-rs/app-server/tests/common/lib.rs +++ b/codex-rs/app-server/tests/common/lib.rs @@ -1,6 +1,7 @@ mod auth_fixtures; mod mcp_process; mod mock_model_server; +mod models_cache; mod responses; mod rollout; @@ -11,9 +12,13 @@ pub use auth_fixtures::write_chatgpt_auth; use codex_app_server_protocol::JSONRPCResponse; pub use core_test_support::format_with_current_shell; pub use core_test_support::format_with_current_shell_display; +pub use core_test_support::format_with_current_shell_display_non_login; +pub use core_test_support::format_with_current_shell_non_login; pub use mcp_process::McpProcess; pub use mock_model_server::create_mock_chat_completions_server; pub use mock_model_server::create_mock_chat_completions_server_unchecked; +pub use models_cache::write_models_cache; +pub use models_cache::write_models_cache_with_models; pub use responses::create_apply_patch_sse_response; pub use responses::create_exec_command_sse_response; pub use responses::create_final_assistant_message_sse_response; diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs new file mode 100644 index 0000000000..f1ff8c5a31 --- /dev/null +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -0,0 +1,85 @@ +use chrono::DateTime; +use chrono::Utc; +use codex_core::openai_models::model_presets::all_model_presets; +use codex_protocol::openai_models::ClientVersion; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ReasoningSummaryFormat; +use codex_protocol::openai_models::TruncationPolicyConfig; +use serde_json::json; +use std::path::Path; + +/// Convert a ModelPreset to ModelInfo for cache storage. +fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { + ModelInfo { + slug: preset.id.clone(), + display_name: preset.display_name.clone(), + description: Some(preset.description.clone()), + default_reasoning_level: preset.default_reasoning_effort, + supported_reasoning_levels: preset.supported_reasoning_efforts.clone(), + shell_type: ConfigShellToolType::ShellCommand, + visibility: if preset.show_in_picker { + ModelVisibility::List + } else { + ModelVisibility::Hide + }, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority, + upgrade: preset.upgrade.as_ref().map(|u| u.id.clone()), + base_instructions: None, + supports_reasoning_summaries: false, + support_verbosity: false, + default_verbosity: None, + apply_patch_tool_type: None, + truncation_policy: TruncationPolicyConfig::bytes(10_000), + supports_parallel_tool_calls: false, + context_window: None, + reasoning_summary_format: ReasoningSummaryFormat::None, + experimental_supported_tools: Vec::new(), + } +} + +/// Write a models_cache.json file to the codex home directory. +/// This prevents ModelsManager from making network requests to refresh models. +/// The cache will be treated as fresh (within TTL) and used instead of fetching from the network. +/// Uses the built-in model presets from ModelsManager, converted to ModelInfo format. +pub fn write_models_cache(codex_home: &Path) -> std::io::Result<()> { + // Get all presets and filter for show_in_picker (same as builtin_model_presets does) + let presets: Vec<&ModelPreset> = all_model_presets() + .iter() + .filter(|preset| preset.show_in_picker) + .collect(); + // Convert presets to ModelInfo, assigning priorities (higher = earlier in list) + // Priority is used for sorting, so first model gets highest priority + let models: Vec = presets + .iter() + .enumerate() + .map(|(idx, preset)| { + // Higher priority = earlier in list, so reverse the index + let priority = (presets.len() - idx) as i32; + preset_to_info(preset, priority) + }) + .collect(); + + write_models_cache_with_models(codex_home, models) +} + +/// Write a models_cache.json file with specific models. +/// Useful when tests need specific models to be available. +pub fn write_models_cache_with_models( + codex_home: &Path, + models: Vec, +) -> std::io::Result<()> { + let cache_path = codex_home.join("models_cache.json"); + // DateTime serializes to RFC3339 format by default with serde + let fetched_at: DateTime = Utc::now(); + let cache = json!({ + "fetched_at": fetched_at, + "etag": null, + "models": models + }); + std::fs::write(cache_path, serde_json::to_string_pretty(&cache)?) +} diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index a64aca8051..e417198994 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -23,10 +23,10 @@ use codex_app_server_protocol::SendUserTurnResponse; use codex_app_server_protocol::ServerRequest; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; -use codex_core::protocol_config_types::ReasoningEffort; use codex_core::protocol_config_types::ReasoningSummary; use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_protocol::config_types::SandboxMode; +use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; @@ -271,7 +271,6 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> { command: format_with_current_shell("python3 -c 'print(42)'"), cwd: working_directory.clone(), reason: None, - risk: None, parsed_cmd: vec![ParsedCommand::Unknown { cmd: "python3 -c 'print(42)'".to_string() }], diff --git a/codex-rs/app-server/tests/suite/config.rs b/codex-rs/app-server/tests/suite/config.rs index 75dba57229..88e74a6fb4 100644 --- a/codex-rs/app-server/tests/suite/config.rs +++ b/codex-rs/app-server/tests/suite/config.rs @@ -10,10 +10,10 @@ use codex_app_server_protocol::Tools; use codex_app_server_protocol::UserSavedConfig; use codex_core::protocol::AskForApproval; use codex_protocol::config_types::ForcedLoginMethod; -use codex_protocol::config_types::ReasoningEffort; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::SandboxMode; use codex_protocol::config_types::Verbosity; +use codex_protocol::openai_models::ReasoningEffort; use pretty_assertions::assert_eq; use std::collections::HashMap; use std::path::Path; diff --git a/codex-rs/app-server/tests/suite/list_resume.rs b/codex-rs/app-server/tests/suite/list_resume.rs index 1e89c06848..34e737437c 100644 --- a/codex-rs/app-server/tests/suite/list_resume.rs +++ b/codex-rs/app-server/tests/suite/list_resume.rs @@ -358,3 +358,81 @@ async fn test_list_and_resume_conversations() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn list_conversations_fetches_through_filtered_pages() -> Result<()> { + let codex_home = TempDir::new()?; + + // Only the last 3 conversations match the provider filter; request 3 and + // ensure pagination keeps fetching past non-matching pages. + let cases = [ + ( + "2025-03-04T12-00-00", + "2025-03-04T12:00:00Z", + "skip_provider", + ), + ( + "2025-03-03T12-00-00", + "2025-03-03T12:00:00Z", + "skip_provider", + ), + ( + "2025-03-02T12-00-00", + "2025-03-02T12:00:00Z", + "target_provider", + ), + ( + "2025-03-01T12-00-00", + "2025-03-01T12:00:00Z", + "target_provider", + ), + ( + "2025-02-28T12-00-00", + "2025-02-28T12:00:00Z", + "target_provider", + ), + ]; + + for (ts_file, ts_rfc, provider) in cases { + create_fake_rollout( + codex_home.path(), + ts_file, + ts_rfc, + "Hello", + Some(provider), + None, + )?; + } + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let req_id = mcp + .send_list_conversations_request(ListConversationsParams { + page_size: Some(3), + cursor: None, + model_providers: Some(vec!["target_provider".to_string()]), + }) + .await?; + let resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(req_id)), + ) + .await??; + let ListConversationsResponse { items, next_cursor } = + to_response::(resp)?; + + assert_eq!( + items.len(), + 3, + "should fetch across pages to satisfy the limit" + ); + assert!( + items + .iter() + .all(|item| item.model_provider == "target_provider") + ); + assert_eq!(next_cursor, None); + + Ok(()) +} diff --git a/codex-rs/app-server/tests/suite/login.rs b/codex-rs/app-server/tests/suite/login.rs index c5470c3ec4..e252bcb0c0 100644 --- a/codex-rs/app-server/tests/suite/login.rs +++ b/codex-rs/app-server/tests/suite/login.rs @@ -1,8 +1,6 @@ use anyhow::Result; use app_test_support::McpProcess; use app_test_support::to_response; -use codex_app_server_protocol::CancelLoginChatGptParams; -use codex_app_server_protocol::CancelLoginChatGptResponse; use codex_app_server_protocol::GetAuthStatusParams; use codex_app_server_protocol::GetAuthStatusResponse; use codex_app_server_protocol::JSONRPCError; @@ -14,7 +12,6 @@ use codex_core::auth::AuthCredentialsStoreMode; use codex_login::login_with_api_key; use serial_test::serial; use std::path::Path; -use std::time::Duration; use tempfile::TempDir; use tokio::time::timeout; @@ -87,48 +84,6 @@ async fn logout_chatgpt_removes_auth() -> Result<()> { Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -// Serialize tests that launch the login server since it binds to a fixed port. -#[serial(login_port)] -async fn login_and_cancel_chatgpt() -> Result<()> { - let codex_home = TempDir::new()?; - create_config_toml(codex_home.path())?; - - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; - - let login_id = mcp.send_login_chat_gpt_request().await?; - let login_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(login_id)), - ) - .await??; - let login: LoginChatGptResponse = to_response(login_resp)?; - - let cancel_id = mcp - .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams { - login_id: login.login_id, - }) - .await?; - let cancel_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)), - ) - .await??; - let _ok: CancelLoginChatGptResponse = to_response(cancel_resp)?; - - // Optionally observe the completion notification; do not fail if it races. - let maybe_note = timeout( - Duration::from_secs(2), - mcp.read_stream_until_notification_message("codex/event/login_chat_gpt_complete"), - ) - .await; - if maybe_note.is_err() { - eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel"); - } - Ok(()) -} - fn create_config_toml_forced_login(codex_home: &Path, forced_method: &str) -> std::io::Result<()> { let config_toml = codex_home.join("config.toml"); let contents = format!( diff --git a/codex-rs/app-server/tests/suite/v2/account.rs b/codex-rs/app-server/tests/suite/v2/account.rs index dd59270739..4d481f3959 100644 --- a/codex-rs/app-server/tests/suite/v2/account.rs +++ b/codex-rs/app-server/tests/suite/v2/account.rs @@ -241,7 +241,7 @@ async fn login_account_chatgpt_rejected_when_forced_api() -> Result<()> { #[tokio::test] // Serialize tests that launch the login server since it binds to a fixed port. #[serial(login_port)] -async fn login_account_chatgpt_start() -> Result<()> { +async fn login_account_chatgpt_start_can_be_cancelled() -> Result<()> { let codex_home = TempDir::new()?; create_config_toml(codex_home.path(), CreateConfigTomlParams::default())?; diff --git a/codex-rs/app-server/tests/suite/v2/config_rpc.rs b/codex-rs/app-server/tests/suite/v2/config_rpc.rs index 343a13c3c4..b6615ef667 100644 --- a/codex-rs/app-server/tests/suite/v2/config_rpc.rs +++ b/codex-rs/app-server/tests/suite/v2/config_rpc.rs @@ -1,6 +1,7 @@ use anyhow::Result; use app_test_support::McpProcess; use app_test_support::to_response; +use codex_app_server_protocol::AskForApproval; use codex_app_server_protocol::ConfigBatchWriteParams; use codex_app_server_protocol::ConfigEdit; use codex_app_server_protocol::ConfigLayerName; @@ -12,9 +13,12 @@ use codex_app_server_protocol::JSONRPCError; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::MergeStrategy; use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::SandboxMode; +use codex_app_server_protocol::ToolsV2; use codex_app_server_protocol::WriteStatus; use pretty_assertions::assert_eq; use serde_json::json; +use std::path::PathBuf; use tempfile::TempDir; use tokio::time::timeout; @@ -57,7 +61,7 @@ sandbox_mode = "workspace-write" layers, } = to_response(resp)?; - assert_eq!(config.get("model"), Some(&json!("gpt-user"))); + assert_eq!(config.model.as_deref(), Some("gpt-user")); assert_eq!( origins.get("model").expect("origin").name, ConfigLayerName::User @@ -70,6 +74,64 @@ sandbox_mode = "workspace-write" Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn config_read_includes_tools() -> Result<()> { + let codex_home = TempDir::new()?; + write_config( + &codex_home, + r#" +model = "gpt-user" + +[tools] +web_search = true +view_image = false +"#, + )?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let request_id = mcp + .send_config_read_request(ConfigReadParams { + include_layers: true, + }) + .await?; + let resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(request_id)), + ) + .await??; + let ConfigReadResponse { + config, + origins, + layers, + } = to_response(resp)?; + + let tools = config.tools.expect("tools present"); + assert_eq!( + tools, + ToolsV2 { + web_search: Some(true), + view_image: Some(false), + } + ); + assert_eq!( + origins.get("tools.web_search").expect("origin").name, + ConfigLayerName::User + ); + assert_eq!( + origins.get("tools.view_image").expect("origin").name, + ConfigLayerName::User + ); + + let layers = layers.expect("layers present"); + assert_eq!(layers.len(), 2); + assert_eq!(layers[0].name, ConfigLayerName::SessionFlags); + assert_eq!(layers[1].name, ConfigLayerName::User); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn config_read_includes_system_layer_and_overrides() -> Result<()> { let codex_home = TempDir::new()?; @@ -123,30 +185,29 @@ writable_roots = ["/system"] layers, } = to_response(resp)?; - assert_eq!(config.get("model"), Some(&json!("gpt-system"))); + assert_eq!(config.model.as_deref(), Some("gpt-system")); assert_eq!( origins.get("model").expect("origin").name, ConfigLayerName::System ); - assert_eq!(config.get("approval_policy"), Some(&json!("never"))); + assert_eq!(config.approval_policy, Some(AskForApproval::Never)); assert_eq!( origins.get("approval_policy").expect("origin").name, ConfigLayerName::System ); - assert_eq!(config.get("sandbox_mode"), Some(&json!("workspace-write"))); + assert_eq!(config.sandbox_mode, Some(SandboxMode::WorkspaceWrite)); assert_eq!( origins.get("sandbox_mode").expect("origin").name, ConfigLayerName::User ); - assert_eq!( - config - .get("sandbox_workspace_write") - .and_then(|v| v.get("writable_roots")), - Some(&json!(["/system"])) - ); + let sandbox = config + .sandbox_workspace_write + .as_ref() + .expect("sandbox workspace write"); + assert_eq!(sandbox.writable_roots, vec![PathBuf::from("/system")]); assert_eq!( origins .get("sandbox_workspace_write.writable_roots.0") @@ -155,12 +216,7 @@ writable_roots = ["/system"] ConfigLayerName::System ); - assert_eq!( - config - .get("sandbox_workspace_write") - .and_then(|v| v.get("network_access")), - Some(&json!(true)) - ); + assert!(sandbox.network_access); assert_eq!( origins .get("sandbox_workspace_write.network_access") @@ -206,7 +262,7 @@ model = "gpt-old" let write_id = mcp .send_config_value_write_request(ConfigValueWriteParams { - file_path: codex_home.path().join("config.toml").display().to_string(), + file_path: None, key_path: "model".to_string(), value: json!("gpt-new"), merge_strategy: MergeStrategy::Replace, @@ -219,8 +275,16 @@ model = "gpt-old" ) .await??; let write: ConfigWriteResponse = to_response(write_resp)?; + let expected_file_path = codex_home + .path() + .join("config.toml") + .canonicalize() + .unwrap() + .display() + .to_string(); assert_eq!(write.status, WriteStatus::Ok); + assert_eq!(write.file_path, expected_file_path); assert!(write.overridden_metadata.is_none()); let verify_id = mcp @@ -234,7 +298,7 @@ model = "gpt-old" ) .await??; let verify: ConfigReadResponse = to_response(verify_resp)?; - assert_eq!(verify.config.get("model"), Some(&json!("gpt-new"))); + assert_eq!(verify.config.model.as_deref(), Some("gpt-new")); Ok(()) } @@ -254,7 +318,7 @@ model = "gpt-old" let write_id = mcp .send_config_value_write_request(ConfigValueWriteParams { - file_path: codex_home.path().join("config.toml").display().to_string(), + file_path: Some(codex_home.path().join("config.toml").display().to_string()), key_path: "model".to_string(), value: json!("gpt-new"), merge_strategy: MergeStrategy::Replace, @@ -288,7 +352,7 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> { let batch_id = mcp .send_config_batch_write_request(ConfigBatchWriteParams { - file_path: codex_home.path().join("config.toml").display().to_string(), + file_path: Some(codex_home.path().join("config.toml").display().to_string()), edits: vec![ ConfigEdit { key_path: "sandbox_mode".to_string(), @@ -314,6 +378,14 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> { .await??; let batch_write: ConfigWriteResponse = to_response(batch_resp)?; assert_eq!(batch_write.status, WriteStatus::Ok); + let expected_file_path = codex_home + .path() + .join("config.toml") + .canonicalize() + .unwrap() + .display() + .to_string(); + assert_eq!(batch_write.file_path, expected_file_path); let read_id = mcp .send_config_read_request(ConfigReadParams { @@ -326,22 +398,14 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> { ) .await??; let read: ConfigReadResponse = to_response(read_resp)?; - assert_eq!( - read.config.get("sandbox_mode"), - Some(&json!("workspace-write")) - ); - assert_eq!( - read.config - .get("sandbox_workspace_write") - .and_then(|v| v.get("writable_roots")), - Some(&json!(["/tmp"])) - ); - assert_eq!( - read.config - .get("sandbox_workspace_write") - .and_then(|v| v.get("network_access")), - Some(&json!(false)) - ); + assert_eq!(read.config.sandbox_mode, Some(SandboxMode::WorkspaceWrite)); + let sandbox = read + .config + .sandbox_workspace_write + .as_ref() + .expect("sandbox workspace write"); + assert_eq!(sandbox.writable_roots, vec![PathBuf::from("/tmp")]); + assert!(!sandbox.network_access); Ok(()) } diff --git a/codex-rs/app-server/tests/suite/v2/model_list.rs b/codex-rs/app-server/tests/suite/v2/model_list.rs index 3c4844fed9..eb9d0c32c1 100644 --- a/codex-rs/app-server/tests/suite/v2/model_list.rs +++ b/codex-rs/app-server/tests/suite/v2/model_list.rs @@ -4,6 +4,7 @@ use anyhow::Result; use anyhow::anyhow; use app_test_support::McpProcess; use app_test_support::to_response; +use app_test_support::write_models_cache; use codex_app_server_protocol::JSONRPCError; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::Model; @@ -11,7 +12,7 @@ use codex_app_server_protocol::ModelListParams; use codex_app_server_protocol::ModelListResponse; use codex_app_server_protocol::ReasoningEffortOption; use codex_app_server_protocol::RequestId; -use codex_protocol::config_types::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffort; use pretty_assertions::assert_eq; use tempfile::TempDir; use tokio::time::timeout; @@ -22,6 +23,7 @@ const INVALID_REQUEST_ERROR_CODE: i64 = -32600; #[tokio::test] async fn list_models_returns_all_models_with_large_limit() -> Result<()> { let codex_home = TempDir::new()?; + write_models_cache(codex_home.path())?; let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??; @@ -114,6 +116,39 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { default_reasoning_effort: ReasoningEffort::Medium, is_default: false, }, + Model { + id: "gpt-5.2".to_string(), + model: "gpt-5.2".to_string(), + display_name: "gpt-5.2".to_string(), + description: + "Latest frontier model with improvements across knowledge, reasoning and coding" + .to_string(), + supported_reasoning_efforts: vec![ + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::Low, + description: "Balances speed with some reasoning; useful for straightforward \ + queries and short explanations" + .to_string(), + }, + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::Medium, + description: "Provides a solid balance of reasoning depth and latency for \ + general-purpose tasks" + .to_string(), + }, + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::High, + description: "Maximizes reasoning depth for complex or ambiguous problems" + .to_string(), + }, + ReasoningEffortOption { + reasoning_effort: ReasoningEffort::XHigh, + description: "Extra high reasoning for complex problems".to_string(), + }, + ], + default_reasoning_effort: ReasoningEffort::Medium, + is_default: false, + }, Model { id: "gpt-5.1".to_string(), model: "gpt-5.1".to_string(), @@ -151,6 +186,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { #[tokio::test] async fn list_models_pagination_works() -> Result<()> { let codex_home = TempDir::new()?; + write_models_cache(codex_home.path())?; let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??; @@ -240,14 +276,37 @@ async fn list_models_pagination_works() -> Result<()> { } = to_response::(fourth_response)?; assert_eq!(fourth_items.len(), 1); - assert_eq!(fourth_items[0].id, "gpt-5.1"); - assert!(fourth_cursor.is_none()); + assert_eq!(fourth_items[0].id, "gpt-5.2"); + let fifth_cursor = fourth_cursor.ok_or_else(|| anyhow!("cursor for fifth page"))?; + + let fifth_request = mcp + .send_list_models_request(ModelListParams { + limit: Some(1), + cursor: Some(fifth_cursor.clone()), + }) + .await?; + + let fifth_response: JSONRPCResponse = timeout( + DEFAULT_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(fifth_request)), + ) + .await??; + + let ModelListResponse { + data: fifth_items, + next_cursor: fifth_cursor, + } = to_response::(fifth_response)?; + + assert_eq!(fifth_items.len(), 1); + assert_eq!(fifth_items[0].id, "gpt-5.1"); + assert!(fifth_cursor.is_none()); Ok(()) } #[tokio::test] async fn list_models_rejects_invalid_cursor() -> Result<()> { let codex_home = TempDir::new()?; + write_models_cache(codex_home.path())?; let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??; diff --git a/codex-rs/app-server/tests/suite/v2/rate_limits.rs b/codex-rs/app-server/tests/suite/v2/rate_limits.rs index 7ddccf7a74..e4e670310a 100644 --- a/codex-rs/app-server/tests/suite/v2/rate_limits.rs +++ b/codex-rs/app-server/tests/suite/v2/rate_limits.rs @@ -11,6 +11,7 @@ use codex_app_server_protocol::RateLimitSnapshot; use codex_app_server_protocol::RateLimitWindow; use codex_app_server_protocol::RequestId; use codex_core::auth::AuthCredentialsStoreMode; +use codex_protocol::account::PlanType as AccountPlanType; use pretty_assertions::assert_eq; use serde_json::json; use std::path::Path; @@ -153,6 +154,7 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> { resets_at: Some(secondary_reset_timestamp), }), credits: None, + plan_type: Some(AccountPlanType::Pro), }, }; assert_eq!(received, expected); diff --git a/codex-rs/app-server/tests/suite/v2/thread_list.rs b/codex-rs/app-server/tests/suite/v2/thread_list.rs index 57299ef97e..0132651df8 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_list.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_list.rs @@ -6,37 +6,96 @@ use codex_app_server_protocol::GitInfo as ApiGitInfo; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::SessionSource; -use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadListResponse; use codex_protocol::protocol::GitInfo as CoreGitInfo; +use std::path::Path; use std::path::PathBuf; use tempfile::TempDir; use tokio::time::timeout; const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +async fn init_mcp(codex_home: &Path) -> Result { + let mut mcp = McpProcess::new(codex_home).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + Ok(mcp) +} + +async fn list_threads( + mcp: &mut McpProcess, + cursor: Option, + limit: Option, + providers: Option>, +) -> Result { + let request_id = mcp + .send_thread_list_request(codex_app_server_protocol::ThreadListParams { + cursor, + limit, + model_providers: providers, + }) + .await?; + let resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(request_id)), + ) + .await??; + to_response::(resp) +} + +fn create_fake_rollouts( + codex_home: &Path, + count: usize, + provider_for_index: F, + timestamp_for_index: G, + preview: &str, +) -> Result> +where + F: Fn(usize) -> &'static str, + G: Fn(usize) -> (String, String), +{ + let mut ids = Vec::with_capacity(count); + for i in 0..count { + let (ts_file, ts_rfc) = timestamp_for_index(i); + ids.push(create_fake_rollout( + codex_home, + &ts_file, + &ts_rfc, + preview, + Some(provider_for_index(i)), + None, + )?); + } + Ok(ids) +} + +fn timestamp_at( + year: i32, + month: u32, + day: u32, + hour: u32, + minute: u32, + second: u32, +) -> (String, String) { + ( + format!("{year:04}-{month:02}-{day:02}T{hour:02}-{minute:02}-{second:02}"), + format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z"), + ) +} + #[tokio::test] async fn thread_list_basic_empty() -> Result<()> { let codex_home = TempDir::new()?; create_minimal_config(codex_home.path())?; - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + let mut mcp = init_mcp(codex_home.path()).await?; - // List threads in an empty CODEX_HOME; should return an empty page with nextCursor: null. - let list_id = mcp - .send_thread_list_request(ThreadListParams { - cursor: None, - limit: Some(10), - model_providers: Some(vec!["mock_provider".to_string()]), - }) - .await?; - let list_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(list_id)), + let ThreadListResponse { data, next_cursor } = list_threads( + &mut mcp, + None, + Some(10), + Some(vec!["mock_provider".to_string()]), ) - .await??; - let ThreadListResponse { data, next_cursor } = to_response::(list_resp)?; + .await?; assert!(data.is_empty()); assert_eq!(next_cursor, None); @@ -86,26 +145,19 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> { None, )?; - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + let mut mcp = init_mcp(codex_home.path()).await?; // Page 1: limit 2 → expect next_cursor Some. - let page1_id = mcp - .send_thread_list_request(ThreadListParams { - cursor: None, - limit: Some(2), - model_providers: Some(vec!["mock_provider".to_string()]), - }) - .await?; - let page1_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(page1_id)), - ) - .await??; let ThreadListResponse { data: data1, next_cursor: cursor1, - } = to_response::(page1_resp)?; + } = list_threads( + &mut mcp, + None, + Some(2), + Some(vec!["mock_provider".to_string()]), + ) + .await?; assert_eq!(data1.len(), 2); for thread in &data1 { assert_eq!(thread.preview, "Hello"); @@ -119,22 +171,16 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> { let cursor1 = cursor1.expect("expected nextCursor on first page"); // Page 2: with cursor → expect next_cursor None when no more results. - let page2_id = mcp - .send_thread_list_request(ThreadListParams { - cursor: Some(cursor1), - limit: Some(2), - model_providers: Some(vec!["mock_provider".to_string()]), - }) - .await?; - let page2_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(page2_id)), - ) - .await??; let ThreadListResponse { data: data2, next_cursor: cursor2, - } = to_response::(page2_resp)?; + } = list_threads( + &mut mcp, + Some(cursor1), + Some(2), + Some(vec!["mock_provider".to_string()]), + ) + .await?; assert!(data2.len() <= 2); for thread in &data2 { assert_eq!(thread.preview, "Hello"); @@ -173,23 +219,16 @@ async fn thread_list_respects_provider_filter() -> Result<()> { None, )?; - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + let mut mcp = init_mcp(codex_home.path()).await?; // Filter to only other_provider; expect 1 item, nextCursor None. - let list_id = mcp - .send_thread_list_request(ThreadListParams { - cursor: None, - limit: Some(10), - model_providers: Some(vec!["other_provider".to_string()]), - }) - .await?; - let resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(list_id)), + let ThreadListResponse { data, next_cursor } = list_threads( + &mut mcp, + None, + Some(10), + Some(vec!["other_provider".to_string()]), ) - .await??; - let ThreadListResponse { data, next_cursor } = to_response::(resp)?; + .await?; assert_eq!(data.len(), 1); assert_eq!(next_cursor, None); let thread = &data[0]; @@ -205,6 +244,146 @@ async fn thread_list_respects_provider_filter() -> Result<()> { Ok(()) } +#[tokio::test] +async fn thread_list_fetches_until_limit_or_exhausted() -> Result<()> { + let codex_home = TempDir::new()?; + create_minimal_config(codex_home.path())?; + + // Newest 16 conversations belong to a different provider; the older 8 are the + // only ones that match the filter. We request 8 so the server must keep + // paging past the first two pages to reach the desired count. + create_fake_rollouts( + codex_home.path(), + 24, + |i| { + if i < 16 { + "skip_provider" + } else { + "target_provider" + } + }, + |i| timestamp_at(2025, 3, 30 - i as u32, 12, 0, 0), + "Hello", + )?; + + let mut mcp = init_mcp(codex_home.path()).await?; + + // Request 8 threads for the target provider; the matches only start on the + // third page so we rely on pagination to reach the limit. + let ThreadListResponse { data, next_cursor } = list_threads( + &mut mcp, + None, + Some(8), + Some(vec!["target_provider".to_string()]), + ) + .await?; + assert_eq!( + data.len(), + 8, + "should keep paging until the requested count is filled" + ); + assert!( + data.iter() + .all(|thread| thread.model_provider == "target_provider"), + "all returned threads must match the requested provider" + ); + assert_eq!( + next_cursor, None, + "once the requested count is satisfied on the final page, nextCursor should be None" + ); + + Ok(()) +} + +#[tokio::test] +async fn thread_list_enforces_max_limit() -> Result<()> { + let codex_home = TempDir::new()?; + create_minimal_config(codex_home.path())?; + + create_fake_rollouts( + codex_home.path(), + 105, + |_| "mock_provider", + |i| { + let month = 5 + (i / 28); + let day = (i % 28) + 1; + timestamp_at(2025, month as u32, day as u32, 0, 0, 0) + }, + "Hello", + )?; + + let mut mcp = init_mcp(codex_home.path()).await?; + + let ThreadListResponse { data, next_cursor } = list_threads( + &mut mcp, + None, + Some(200), + Some(vec!["mock_provider".to_string()]), + ) + .await?; + assert_eq!( + data.len(), + 100, + "limit should be clamped to the maximum page size" + ); + assert!( + next_cursor.is_some(), + "when more than the maximum exist, nextCursor should continue pagination" + ); + + Ok(()) +} + +#[tokio::test] +async fn thread_list_stops_when_not_enough_filtered_results_exist() -> Result<()> { + let codex_home = TempDir::new()?; + create_minimal_config(codex_home.path())?; + + // Only the last 7 conversations match the provider filter; we ask for 10 to + // ensure the server exhausts pagination without looping forever. + create_fake_rollouts( + codex_home.path(), + 22, + |i| { + if i < 15 { + "skip_provider" + } else { + "target_provider" + } + }, + |i| timestamp_at(2025, 4, 28 - i as u32, 8, 0, 0), + "Hello", + )?; + + let mut mcp = init_mcp(codex_home.path()).await?; + + // Request more threads than exist after filtering; expect all matches to be + // returned with nextCursor None. + let ThreadListResponse { data, next_cursor } = list_threads( + &mut mcp, + None, + Some(10), + Some(vec!["target_provider".to_string()]), + ) + .await?; + assert_eq!( + data.len(), + 7, + "all available filtered threads should be returned" + ); + assert!( + data.iter() + .all(|thread| thread.model_provider == "target_provider"), + "results should still respect the provider filter" + ); + assert_eq!( + next_cursor, None, + "when results are exhausted before reaching the limit, nextCursor should be None" + ); + + Ok(()) +} + #[tokio::test] async fn thread_list_includes_git_info() -> Result<()> { let codex_home = TempDir::new()?; @@ -224,22 +403,15 @@ async fn thread_list_includes_git_info() -> Result<()> { Some(git_info), )?; - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + let mut mcp = init_mcp(codex_home.path()).await?; - let list_id = mcp - .send_thread_list_request(ThreadListParams { - cursor: None, - limit: Some(10), - model_providers: Some(vec!["mock_provider".to_string()]), - }) - .await?; - let resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(list_id)), + let ThreadListResponse { data, .. } = list_threads( + &mut mcp, + None, + Some(10), + Some(vec!["mock_provider".to_string()]), ) - .await??; - let ThreadListResponse { data, .. } = to_response::(resp)?; + .await?; let thread = data .iter() .find(|t| t.id == conversation_id) diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index 03ee279e51..afc22c7072 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -30,8 +30,8 @@ use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnStartedNotification; use codex_app_server_protocol::TurnStatus; use codex_app_server_protocol::UserInput as V2UserInput; -use codex_core::protocol_config_types::ReasoningEffort; use codex_core::protocol_config_types::ReasoningSummary; +use codex_protocol::openai_models::ReasoningEffort; use core_test_support::skip_if_no_network; use pretty_assertions::assert_eq; use std::path::Path; @@ -427,7 +427,6 @@ async fn turn_start_exec_approval_decline_v2() -> Result<()> { request_id, serde_json::to_value(CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Decline, - accept_settings: None, })?, ) .await?; diff --git a/codex-rs/apply-patch/src/lib.rs b/codex-rs/apply-patch/src/lib.rs index 867d19a2e8..fe4fe584dc 100644 --- a/codex-rs/apply-patch/src/lib.rs +++ b/codex-rs/apply-patch/src/lib.rs @@ -112,7 +112,7 @@ fn classify_shell_name(shell: &str) -> Option { fn classify_shell(shell: &str, flag: &str) -> Option { classify_shell_name(shell).and_then(|name| match name.as_str() { - "bash" | "zsh" | "sh" if flag == "-lc" => Some(ApplyPatchShell::Unix), + "bash" | "zsh" | "sh" if matches!(flag, "-lc" | "-c") => Some(ApplyPatchShell::Unix), "pwsh" | "powershell" if flag.eq_ignore_ascii_case("-command") => { Some(ApplyPatchShell::PowerShell) } @@ -699,13 +699,7 @@ fn derive_new_contents_from_chunks( } }; - let mut original_lines: Vec = original_contents.split('\n').map(String::from).collect(); - - // Drop the trailing empty element that results from the final newline so - // that line counts match the behaviour of standard `diff`. - if original_lines.last().is_some_and(String::is_empty) { - original_lines.pop(); - } + let original_lines: Vec = build_lines_from_contents(&original_contents); let replacements = compute_replacements(&original_lines, path, chunks)?; let new_lines = apply_replacements(original_lines, &replacements); @@ -713,13 +707,67 @@ fn derive_new_contents_from_chunks( if !new_lines.last().is_some_and(String::is_empty) { new_lines.push(String::new()); } - let new_contents = new_lines.join("\n"); + let new_contents = build_contents_from_lines(&original_contents, &new_lines); Ok(AppliedPatch { original_contents, new_contents, }) } +// TODO(dylan-hurd-oai): I think we can migrate to just use `contents.lines()` +// across all platforms. +fn build_lines_from_contents(contents: &str) -> Vec { + if cfg!(windows) { + contents.lines().map(String::from).collect() + } else { + let mut lines: Vec = contents.split('\n').map(String::from).collect(); + + // Drop the trailing empty element that results from the final newline so + // that line counts match the behaviour of standard `diff`. + if lines.last().is_some_and(String::is_empty) { + lines.pop(); + } + + lines + } +} + +fn build_contents_from_lines(original_contents: &str, lines: &[String]) -> String { + if cfg!(windows) { + // for now, only compute this if we're on Windows. + let uses_crlf = contents_uses_crlf(original_contents); + if uses_crlf { + lines.join("\r\n") + } else { + lines.join("\n") + } + } else { + lines.join("\n") + } +} + +/// Detects whether the source file uses Windows CRLF line endings consistently. +/// We only consider a file CRLF-formatted if every newline is part of a +/// CRLF sequence. This avoids rewriting an LF-formatted file that merely +/// contains embedded sequences of "\r\n". +/// +/// Returns `true` if the file uses CRLF line endings, `false` otherwise. +fn contents_uses_crlf(contents: &str) -> bool { + let bytes = contents.as_bytes(); + let mut n_newlines = 0usize; + let mut n_crlf = 0usize; + for i in 0..bytes.len() { + if bytes[i] == b'\n' { + n_newlines += 1; + if i > 0 && bytes[i - 1] == b'\r' { + n_crlf += 1; + } + } + } + + n_newlines > 0 && n_crlf == n_newlines +} + /// Compute a list of replacements needed to transform `original_lines` into the /// new lines, given the patch `chunks`. Each replacement is returned as /// `(start_index, old_len, new_lines)`. @@ -1049,6 +1097,13 @@ mod tests { assert_match(&heredoc_script(""), None); } + #[test] + fn test_heredoc_non_login_shell() { + let script = heredoc_script(""); + let args = strs_to_strings(&["bash", "-c", &script]); + assert_match_args(args, None); + } + #[test] fn test_heredoc_applypatch() { let args = strs_to_strings(&[ @@ -1359,6 +1414,72 @@ PATCH"#, assert_eq!(contents, "a\nB\nc\nd\nE\nf\ng\n"); } + /// Ensure CRLF line endings are preserved for updated files on Windows‑style inputs. + #[cfg(windows)] + #[test] + fn test_preserve_crlf_line_endings_on_update() { + let dir = tempdir().unwrap(); + let path = dir.path().join("crlf.txt"); + + // Original file uses CRLF (\r\n) endings. + std::fs::write(&path, b"a\r\nb\r\nc\r\n").unwrap(); + + // Replace `b` -> `B` and append `d`. + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ + a +-b ++B +@@ + c ++d +*** End of File"#, + path.display() + )); + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + apply_patch(&patch, &mut stdout, &mut stderr).unwrap(); + + let out = std::fs::read(&path).unwrap(); + // Expect all CRLF endings; count occurrences of CRLF and ensure there are 4 lines. + let content = String::from_utf8_lossy(&out); + assert!(content.contains("\r\n")); + // No bare LF occurrences immediately preceding a non-CR: the text should not contain "a\nb". + assert!(!content.contains("a\nb")); + // Validate exact content sequence with CRLF delimiters. + assert_eq!(content, "a\r\nB\r\nc\r\nd\r\n"); + } + + /// Ensure CRLF inputs with embedded carriage returns in the content are preserved. + #[cfg(windows)] + #[test] + fn test_preserve_crlf_embedded_carriage_returns_on_append() { + let dir = tempdir().unwrap(); + let path = dir.path().join("crlf_cr_content.txt"); + + // Original file: first line has a literal '\r' in the content before the CRLF terminator. + std::fs::write(&path, b"foo\r\r\nbar\r\n").unwrap(); + + // Append a new line without modifying existing ones. + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ ++BAZ +*** End of File"#, + path.display() + )); + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + apply_patch(&patch, &mut stdout, &mut stderr).unwrap(); + + let out = std::fs::read(&path).unwrap(); + // CRLF endings must be preserved and the extra CR in "foo\r\r" must not be collapsed. + assert_eq!(out.as_slice(), b"foo\r\r\nbar\r\nBAZ\r\n"); + } + #[test] fn test_pure_addition_chunk_followed_by_removal() { let dir = tempdir().unwrap(); @@ -1544,6 +1665,37 @@ PATCH"#, assert_eq!(expected, diff); } + /// For LF-only inputs with a trailing newline ensure that the helper used + /// on Windows-style builds drops the synthetic trailing empty element so + /// replacements behave like standard `diff` line numbering. + #[test] + fn test_derive_new_contents_lf_trailing_newline() { + let dir = tempdir().unwrap(); + let path = dir.path().join("lf_trailing_newline.txt"); + fs::write(&path, "foo\nbar\n").unwrap(); + + let patch = wrap_patch(&format!( + r#"*** Update File: {} +@@ + foo +-bar ++BAR +"#, + path.display() + )); + + let patch = parse_patch(&patch).unwrap(); + let chunks = match patch.hunks.as_slice() { + [Hunk::UpdateFile { chunks, .. }] => chunks, + _ => panic!("Expected a single UpdateFile hunk"), + }; + + let AppliedPatch { new_contents, .. } = + derive_new_contents_from_chunks(&path, chunks).unwrap(); + + assert_eq!(new_contents, "foo\nBAR\n"); + } + #[test] fn test_unified_diff_insert_at_eof() { // Insert a new line at end‑of‑file. diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/.gitattributes b/codex-rs/apply-patch/tests/fixtures/scenarios/.gitattributes new file mode 100644 index 0000000000..a42a20ddc5 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/.gitattributes @@ -0,0 +1 @@ +** text eol=lf diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/expected/bar.md b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/expected/bar.md new file mode 100644 index 0000000000..6dfa057f0d --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/expected/bar.md @@ -0,0 +1 @@ +This is a new file diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/patch.txt new file mode 100644 index 0000000000..37735b2a46 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/patch.txt @@ -0,0 +1,4 @@ +*** Begin Patch +*** Add File: bar.md ++This is a new file +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/modify.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/modify.txt new file mode 100644 index 0000000000..1b2ee3e566 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/modify.txt @@ -0,0 +1,2 @@ +line1 +changed diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/nested/new.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/nested/new.txt new file mode 100644 index 0000000000..3151666398 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/nested/new.txt @@ -0,0 +1 @@ +created diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/delete.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/delete.txt new file mode 100644 index 0000000000..6e263abce1 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/delete.txt @@ -0,0 +1 @@ +obsolete diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/modify.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/modify.txt new file mode 100644 index 0000000000..c0d0fb45c3 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/modify.txt @@ -0,0 +1,2 @@ +line1 +line2 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/patch.txt new file mode 100644 index 0000000000..673dec2f78 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/patch.txt @@ -0,0 +1,9 @@ +*** Begin Patch +*** Add File: nested/new.txt ++created +*** Delete File: delete.txt +*** Update File: modify.txt +@@ +-line2 ++changed +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/expected/multi.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/expected/multi.txt new file mode 100644 index 0000000000..9054a72916 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/expected/multi.txt @@ -0,0 +1,4 @@ +line1 +changed2 +line3 +changed4 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/input/multi.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/input/multi.txt new file mode 100644 index 0000000000..84275f9939 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/input/multi.txt @@ -0,0 +1,4 @@ +line1 +line2 +line3 +line4 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/patch.txt new file mode 100644 index 0000000000..45733c714b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/patch.txt @@ -0,0 +1,9 @@ +*** Begin Patch +*** Update File: multi.txt +@@ +-line2 ++changed2 +@@ +-line4 ++changed4 +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/old/other.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/old/other.txt new file mode 100644 index 0000000000..b61039d3df --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/old/other.txt @@ -0,0 +1 @@ +unrelated file diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/renamed/dir/name.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/renamed/dir/name.txt new file mode 100644 index 0000000000..b66ba06d31 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/renamed/dir/name.txt @@ -0,0 +1 @@ +new content diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/name.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/name.txt new file mode 100644 index 0000000000..33194a0a6f --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/name.txt @@ -0,0 +1 @@ +old content diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/other.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/other.txt new file mode 100644 index 0000000000..b61039d3df --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/other.txt @@ -0,0 +1 @@ +unrelated file diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/patch.txt new file mode 100644 index 0000000000..5e2d723a2b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/patch.txt @@ -0,0 +1,7 @@ +*** Begin Patch +*** Update File: old/name.txt +*** Move to: renamed/dir/name.txt +@@ +-old content ++new content +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/patch.txt new file mode 100644 index 0000000000..4fcfecbbc7 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/patch.txt @@ -0,0 +1,2 @@ +*** Begin Patch +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/expected/modify.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/expected/modify.txt new file mode 100644 index 0000000000..c0d0fb45c3 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/expected/modify.txt @@ -0,0 +1,2 @@ +line1 +line2 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/input/modify.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/input/modify.txt new file mode 100644 index 0000000000..c0d0fb45c3 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/input/modify.txt @@ -0,0 +1,2 @@ +line1 +line2 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/patch.txt new file mode 100644 index 0000000000..488438b12b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/patch.txt @@ -0,0 +1,6 @@ +*** Begin Patch +*** Update File: modify.txt +@@ +-missing ++changed +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/patch.txt new file mode 100644 index 0000000000..6f95531db3 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/patch.txt @@ -0,0 +1,3 @@ +*** Begin Patch +*** Delete File: missing.txt +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/patch.txt new file mode 100644 index 0000000000..d7596a362b --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/patch.txt @@ -0,0 +1,3 @@ +*** Begin Patch +*** Update File: foo.txt +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/patch.txt new file mode 100644 index 0000000000..a7de4f24c5 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/patch.txt @@ -0,0 +1,6 @@ +*** Begin Patch +*** Update File: missing.txt +@@ +-old ++new +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/old/other.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/old/other.txt new file mode 100644 index 0000000000..b61039d3df --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/old/other.txt @@ -0,0 +1 @@ +unrelated file diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/renamed/dir/name.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/renamed/dir/name.txt new file mode 100644 index 0000000000..3e757656cf --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/renamed/dir/name.txt @@ -0,0 +1 @@ +new diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/name.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/name.txt new file mode 100644 index 0000000000..3940df7cd8 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/name.txt @@ -0,0 +1 @@ +from diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/other.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/other.txt new file mode 100644 index 0000000000..b61039d3df --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/other.txt @@ -0,0 +1 @@ +unrelated file diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/renamed/dir/name.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/renamed/dir/name.txt new file mode 100644 index 0000000000..cbaf024e5e --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/renamed/dir/name.txt @@ -0,0 +1 @@ +existing diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/patch.txt new file mode 100644 index 0000000000..c45ce6d782 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/patch.txt @@ -0,0 +1,7 @@ +*** Begin Patch +*** Update File: old/name.txt +*** Move to: renamed/dir/name.txt +@@ +-from ++new +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/expected/duplicate.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/expected/duplicate.txt new file mode 100644 index 0000000000..b66ba06d31 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/expected/duplicate.txt @@ -0,0 +1 @@ +new content diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/input/duplicate.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/input/duplicate.txt new file mode 100644 index 0000000000..33194a0a6f --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/input/duplicate.txt @@ -0,0 +1 @@ +old content diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/patch.txt new file mode 100644 index 0000000000..bad9cf3fde --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/patch.txt @@ -0,0 +1,4 @@ +*** Begin Patch +*** Add File: duplicate.txt ++new content +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/patch.txt new file mode 100644 index 0000000000..a10bcd9ea9 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/patch.txt @@ -0,0 +1,3 @@ +*** Begin Patch +*** Delete File: dir +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/patch.txt new file mode 100644 index 0000000000..b35d7207d7 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/patch.txt @@ -0,0 +1,3 @@ +*** Begin Patch +*** Frobnicate File: foo +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/expected/no_newline.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/expected/no_newline.txt new file mode 100644 index 0000000000..06fcdd77c9 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/expected/no_newline.txt @@ -0,0 +1,2 @@ +first line +second line diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/input/no_newline.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/input/no_newline.txt new file mode 100644 index 0000000000..a6e09874b5 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/input/no_newline.txt @@ -0,0 +1 @@ +no newline at end diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/patch.txt new file mode 100644 index 0000000000..4ed5818eb1 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/patch.txt @@ -0,0 +1,7 @@ +*** Begin Patch +*** Update File: no_newline.txt +@@ +-no newline at end ++first line ++second line +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/expected/created.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/expected/created.txt new file mode 100644 index 0000000000..ce01362503 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/expected/created.txt @@ -0,0 +1 @@ +hello diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/patch.txt new file mode 100644 index 0000000000..a6e9709d1f --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/015_failure_after_partial_success_leaves_changes/patch.txt @@ -0,0 +1,8 @@ +*** Begin Patch +*** Add File: created.txt ++hello +*** Update File: missing.txt +@@ +-old ++new +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/expected/input.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/expected/input.txt new file mode 100644 index 0000000000..f6d6f0bef8 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/expected/input.txt @@ -0,0 +1,4 @@ +line1 +line2 +added line 1 +added line 2 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/input/input.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/input/input.txt new file mode 100644 index 0000000000..c0d0fb45c3 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/input/input.txt @@ -0,0 +1,2 @@ +line1 +line2 diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/patch.txt new file mode 100644 index 0000000000..56337549f9 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/016_pure_addition_update_chunk/patch.txt @@ -0,0 +1,6 @@ +*** Begin Patch +*** Update File: input.txt +@@ ++added line 1 ++added line 2 +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/expected/foo.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/expected/foo.txt new file mode 100644 index 0000000000..3e757656cf --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/expected/foo.txt @@ -0,0 +1 @@ +new diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/input/foo.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/input/foo.txt new file mode 100644 index 0000000000..3367afdbbf --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/input/foo.txt @@ -0,0 +1 @@ +old diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/patch.txt new file mode 100644 index 0000000000..21e6c1958d --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/017_whitespace_padded_hunk_header/patch.txt @@ -0,0 +1,6 @@ +*** Begin Patch + *** Update File: foo.txt +@@ +-old ++new +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/expected/file.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/expected/file.txt new file mode 100644 index 0000000000..f719efd430 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/expected/file.txt @@ -0,0 +1 @@ +two diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/input/file.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/input/file.txt new file mode 100644 index 0000000000..5626abf0f7 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/input/file.txt @@ -0,0 +1 @@ +one diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/patch.txt b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/patch.txt new file mode 100644 index 0000000000..2648721797 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/018_whitespace_padded_patch_markers/patch.txt @@ -0,0 +1,6 @@ + *** Begin Patch +*** Update File: file.txt +@@ +-one ++two +*** End Patch diff --git a/codex-rs/apply-patch/tests/fixtures/scenarios/README.md b/codex-rs/apply-patch/tests/fixtures/scenarios/README.md new file mode 100644 index 0000000000..65d1fbe2e4 --- /dev/null +++ b/codex-rs/apply-patch/tests/fixtures/scenarios/README.md @@ -0,0 +1,18 @@ +# Overview +This directory is a collection of end to end tests for the apply-patch specification, meant to be easily portable to other languages or platforms. + + +# Specification +Each test case is one directory, composed of input state (input/), the patch operation (patch.txt), and the expected final state (expected/). This structure is designed to keep tests simple (i.e. test exactly one patch at a time) while still providing enough flexibility to test any given operation across files. + +Here's what this would look like for a simple test apply-patch test case to create a new file: + +``` +001_add/ + input/ + foo.md + expected/ + foo.md + bar.md + patch.txt +``` diff --git a/codex-rs/apply-patch/tests/suite/mod.rs b/codex-rs/apply-patch/tests/suite/mod.rs index 882c5a6ffd..7d54de85ad 100644 --- a/codex-rs/apply-patch/tests/suite/mod.rs +++ b/codex-rs/apply-patch/tests/suite/mod.rs @@ -1,3 +1,4 @@ mod cli; +mod scenarios; #[cfg(not(target_os = "windows"))] mod tool; diff --git a/codex-rs/apply-patch/tests/suite/scenarios.rs b/codex-rs/apply-patch/tests/suite/scenarios.rs new file mode 100644 index 0000000000..4b3eb3c84a --- /dev/null +++ b/codex-rs/apply-patch/tests/suite/scenarios.rs @@ -0,0 +1,114 @@ +use assert_cmd::prelude::*; +use pretty_assertions::assert_eq; +use std::collections::BTreeMap; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; +use tempfile::tempdir; + +#[test] +fn test_apply_patch_scenarios() -> anyhow::Result<()> { + for scenario in fs::read_dir("tests/fixtures/scenarios")? { + let scenario = scenario?; + let path = scenario.path(); + if path.is_dir() { + run_apply_patch_scenario(&path)?; + } + } + Ok(()) +} + +/// Reads a scenario directory, copies the input files to a temporary directory, runs apply-patch, +/// and asserts that the final state matches the expected state exactly. +fn run_apply_patch_scenario(dir: &Path) -> anyhow::Result<()> { + let tmp = tempdir()?; + + // Copy the input files to the temporary directory + let input_dir = dir.join("input"); + if input_dir.is_dir() { + copy_dir_recursive(&input_dir, tmp.path())?; + } + + // Read the patch.txt file + let patch = fs::read_to_string(dir.join("patch.txt"))?; + + // Run apply_patch in the temporary directory. We intentionally do not assert + // on the exit status here; the scenarios are specified purely in terms of + // final filesystem state, which we compare below. + Command::cargo_bin("apply_patch")? + .arg(patch) + .current_dir(tmp.path()) + .output()?; + + // Assert that the final state matches the expected state exactly + let expected_dir = dir.join("expected"); + let expected_snapshot = snapshot_dir(&expected_dir)?; + let actual_snapshot = snapshot_dir(tmp.path())?; + + assert_eq!( + actual_snapshot, + expected_snapshot, + "Scenario {} did not match expected final state", + dir.display() + ); + + Ok(()) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum Entry { + File(Vec), + Dir, +} + +fn snapshot_dir(root: &Path) -> anyhow::Result> { + let mut entries = BTreeMap::new(); + if root.is_dir() { + snapshot_dir_recursive(root, root, &mut entries)?; + } + Ok(entries) +} + +fn snapshot_dir_recursive( + base: &Path, + dir: &Path, + entries: &mut BTreeMap, +) -> anyhow::Result<()> { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + let Some(stripped) = path.strip_prefix(base).ok() else { + continue; + }; + let rel = stripped.to_path_buf(); + let file_type = entry.file_type()?; + if file_type.is_dir() { + entries.insert(rel.clone(), Entry::Dir); + snapshot_dir_recursive(base, &path, entries)?; + } else if file_type.is_file() { + let contents = fs::read(&path)?; + entries.insert(rel, Entry::File(contents)); + } + } + Ok(()) +} + +fn copy_dir_recursive(src: &Path, dst: &Path) -> anyhow::Result<()> { + for entry in fs::read_dir(src)? { + let entry = entry?; + let path = entry.path(); + let file_type = entry.file_type()?; + let dest_path = dst.join(entry.file_name()); + if file_type.is_dir() { + fs::create_dir_all(&dest_path)?; + copy_dir_recursive(&path, &dest_path)?; + } else if file_type.is_file() { + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent)?; + } + fs::copy(&path, &dest_path)?; + } + } + Ok(()) +} diff --git a/codex-rs/backend-client/src/client.rs b/codex-rs/backend-client/src/client.rs index 0fb627ef0a..4b5eaa4105 100644 --- a/codex-rs/backend-client/src/client.rs +++ b/codex-rs/backend-client/src/client.rs @@ -7,6 +7,7 @@ use crate::types::TurnAttemptsSiblingTurnsResponse; use anyhow::Result; use codex_core::auth::CodexAuth; use codex_core::default_client::get_codex_user_agent; +use codex_protocol::account::PlanType as AccountPlanType; use codex_protocol::protocol::CreditsSnapshot; use codex_protocol::protocol::RateLimitSnapshot; use codex_protocol::protocol::RateLimitWindow; @@ -291,6 +292,7 @@ impl Client { primary, secondary, credits: Self::map_credits(payload.credits), + plan_type: Some(Self::map_plan_type(payload.plan_type)), } } @@ -325,6 +327,23 @@ impl Client { }) } + fn map_plan_type(plan_type: crate::types::PlanType) -> AccountPlanType { + match plan_type { + crate::types::PlanType::Free => AccountPlanType::Free, + crate::types::PlanType::Plus => AccountPlanType::Plus, + crate::types::PlanType::Pro => AccountPlanType::Pro, + crate::types::PlanType::Team => AccountPlanType::Team, + crate::types::PlanType::Business => AccountPlanType::Business, + crate::types::PlanType::Enterprise => AccountPlanType::Enterprise, + crate::types::PlanType::Edu | crate::types::PlanType::Education => AccountPlanType::Edu, + crate::types::PlanType::Guest + | crate::types::PlanType::Go + | crate::types::PlanType::FreeWorkspace + | crate::types::PlanType::Quorum + | crate::types::PlanType::K12 => AccountPlanType::Unknown, + } + } + fn window_minutes_from_seconds(seconds: i32) -> Option { if seconds <= 0 { return None; diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml index 6c80a12595..84e6e9acaf 100644 --- a/codex-rs/cli/Cargo.toml +++ b/codex-rs/cli/Cargo.toml @@ -36,6 +36,7 @@ codex-responses-api-proxy = { workspace = true } codex-rmcp-client = { workspace = true } codex-stdio-to-uds = { workspace = true } codex-tui = { workspace = true } +codex-tui2 = { workspace = true } ctor = { workspace = true } libc = { workspace = true } owo-colors = { workspace = true } diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index 6cff73e86d..113c6a7515 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -25,6 +25,7 @@ use codex_responses_api_proxy::Args as ResponsesApiProxyArgs; use codex_tui::AppExitInfo; use codex_tui::Cli as TuiCli; use codex_tui::update_action::UpdateAction; +use codex_tui2 as tui2; use owo_colors::OwoColorize; use std::path::PathBuf; use supports_color::Stream; @@ -37,6 +38,11 @@ use crate::mcp_cmd::McpCli; use codex_core::config::Config; use codex_core::config::ConfigOverrides; +use codex_core::config::find_codex_home; +use codex_core::config::load_config_as_toml_with_cli_overrides; +use codex_core::features::Feature; +use codex_core::features::FeatureOverrides; +use codex_core::features::Features; use codex_core::features::is_known_feature_key; /// Codex CLI @@ -444,7 +450,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<() &mut interactive.config_overrides, root_config_overrides.clone(), ); - let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?; + let exit_info = run_interactive_tui(interactive, codex_linux_sandbox_exe).await?; handle_app_exit(exit_info)?; } Some(Subcommand::Exec(mut exec_cli)) => { @@ -499,7 +505,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option) -> anyhow::Result<() all, config_overrides, ); - let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?; + let exit_info = run_interactive_tui(interactive, codex_linux_sandbox_exe).await?; handle_app_exit(exit_info)?; } Some(Subcommand::Login(mut login_cli)) => { @@ -650,6 +656,40 @@ fn prepend_config_flags( .splice(0..0, cli_config_overrides.raw_overrides); } +/// Run the interactive Codex TUI, dispatching to either the legacy implementation or the +/// experimental TUI v2 shim based on feature flags resolved from config. +async fn run_interactive_tui( + interactive: TuiCli, + codex_linux_sandbox_exe: Option, +) -> std::io::Result { + if is_tui2_enabled(&interactive).await? { + let result = tui2::run_main(interactive.into(), codex_linux_sandbox_exe).await?; + Ok(result.into()) + } else { + codex_tui::run_main(interactive, codex_linux_sandbox_exe).await + } +} + +/// Returns `Ok(true)` when the resolved configuration enables the `tui2` feature flag. +/// +/// This performs a lightweight config load (honoring the same precedence as the lower-level TUI +/// bootstrap: `$CODEX_HOME`, config.toml, profile, and CLI `-c` overrides) solely to decide which +/// TUI frontend to launch. The full configuration is still loaded later by the interactive TUI. +async fn is_tui2_enabled(cli: &TuiCli) -> std::io::Result { + let raw_overrides = cli.config_overrides.raw_overrides.clone(); + let overrides_cli = codex_common::CliConfigOverrides { raw_overrides }; + let cli_kv_overrides = overrides_cli + .parse_overrides() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?; + + let codex_home = find_codex_home()?; + let config_toml = load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides).await?; + let config_profile = config_toml.get_config_profile(cli.config_profile.clone())?; + let overrides = FeatureOverrides::default(); + let features = Features::from_config(&config_toml, &config_profile, overrides); + Ok(features.enabled(Feature::Tui2)) +} + /// Build the final `TuiCli` for a `codex resume` invocation. fn finalize_resume_interactive( mut interactive: TuiCli, diff --git a/codex-rs/cli/src/mcp_cmd.rs b/codex-rs/cli/src/mcp_cmd.rs index 93f22e705c..bfeedb1f78 100644 --- a/codex-rs/cli/src/mcp_cmd.rs +++ b/codex-rs/cli/src/mcp_cmd.rs @@ -53,11 +53,11 @@ pub enum McpSubcommand { Remove(RemoveArgs), /// [experimental] Authenticate with a configured MCP server via OAuth. - /// Requires experimental_use_rmcp_client = true in config.toml. + /// Requires features.rmcp_client = true in config.toml. Login(LoginArgs), /// [experimental] Remove stored OAuth credentials for a server. - /// Requires experimental_use_rmcp_client = true in config.toml. + /// Requires features.rmcp_client = true in config.toml. Logout(LogoutArgs), } @@ -285,7 +285,7 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re Ok(true) => { if !config.features.enabled(Feature::RmcpClient) { println!( - "MCP server supports login. Add `experimental_use_rmcp_client = true` \ + "MCP server supports login. Add `features.rmcp_client = true` \ to your config.toml and run `codex mcp login {name}` to login." ); } else { diff --git a/codex-rs/cli/src/wsl_paths.rs b/codex-rs/cli/src/wsl_paths.rs index 56ce8668cf..b6ceb2e0bc 100644 --- a/codex-rs/cli/src/wsl_paths.rs +++ b/codex-rs/cli/src/wsl_paths.rs @@ -1,24 +1,7 @@ use std::ffi::OsStr; -/// WSL-specific path helpers used by the updater logic. -/// -/// See https://github.com/openai/codex/issues/6086. -pub fn is_wsl() -> bool { - #[cfg(target_os = "linux")] - { - if std::env::var_os("WSL_DISTRO_NAME").is_some() { - return true; - } - match std::fs::read_to_string("/proc/version") { - Ok(version) => version.to_lowercase().contains("microsoft"), - Err(_) => false, - } - } - #[cfg(not(target_os = "linux"))] - { - false - } -} +/// Returns true if the current process is running under WSL. +pub use codex_core::env::is_wsl; /// Convert a Windows absolute path (`C:\foo\bar` or `C:/foo/bar`) to a WSL mount path (`/mnt/c/foo/bar`). /// Returns `None` if the input does not look like a Windows drive path. diff --git a/codex-rs/cli/tests/execpolicy.rs b/codex-rs/cli/tests/execpolicy.rs index c6bca85bc6..241a873d59 100644 --- a/codex-rs/cli/tests/execpolicy.rs +++ b/codex-rs/cli/tests/execpolicy.rs @@ -8,7 +8,12 @@ use tempfile::TempDir; #[test] fn execpolicy_check_matches_expected_json() -> Result<(), Box> { let codex_home = TempDir::new()?; - let policy_path = codex_home.path().join("policy.codexpolicy"); + let policy_path = codex_home.path().join("rules").join("policy.rules"); + fs::create_dir_all( + policy_path + .parent() + .expect("policy path should have a parent"), + )?; fs::write( &policy_path, r#" @@ -24,7 +29,7 @@ prefix_rule( .args([ "execpolicy", "check", - "--policy", + "--rules", policy_path .to_str() .expect("policy path should be valid UTF-8"), @@ -40,17 +45,15 @@ prefix_rule( assert_eq!( result, json!({ - "match": { - "decision": "forbidden", - "matchedRules": [ - { - "prefixRuleMatch": { - "matchedPrefix": ["git", "push"], - "decision": "forbidden" - } + "decision": "forbidden", + "matchedRules": [ + { + "prefixRuleMatch": { + "matchedPrefix": ["git", "push"], + "decision": "forbidden" } - ] - } + } + ] }) ); diff --git a/codex-rs/cloud-tasks-client/src/api.rs b/codex-rs/cloud-tasks-client/src/api.rs index 4bd12939e8..cd8228bc28 100644 --- a/codex-rs/cloud-tasks-client/src/api.rs +++ b/codex-rs/cloud-tasks-client/src/api.rs @@ -127,6 +127,7 @@ impl Default for TaskText { #[async_trait::async_trait] pub trait CloudBackend: Send + Sync { async fn list_tasks(&self, env: Option<&str>) -> Result>; + async fn get_task_summary(&self, id: TaskId) -> Result; async fn get_task_diff(&self, id: TaskId) -> Result>; /// Return assistant output messages (no diff) when available. async fn get_task_messages(&self, id: TaskId) -> Result>; diff --git a/codex-rs/cloud-tasks-client/src/http.rs b/codex-rs/cloud-tasks-client/src/http.rs index 57d39b7bda..f55d0fe797 100644 --- a/codex-rs/cloud-tasks-client/src/http.rs +++ b/codex-rs/cloud-tasks-client/src/http.rs @@ -63,6 +63,10 @@ impl CloudBackend for HttpClient { self.tasks_api().list(env).await } + async fn get_task_summary(&self, id: TaskId) -> Result { + self.tasks_api().summary(id).await + } + async fn get_task_diff(&self, id: TaskId) -> Result> { self.tasks_api().diff(id).await } @@ -149,6 +153,75 @@ mod api { Ok(tasks) } + pub(crate) async fn summary(&self, id: TaskId) -> Result { + let id_str = id.0.clone(); + let (details, body, ct) = self + .details_with_body(&id.0) + .await + .map_err(|e| CloudTaskError::Http(format!("get_task_details failed: {e}")))?; + let parsed: Value = serde_json::from_str(&body).map_err(|e| { + CloudTaskError::Http(format!( + "Decode error for {}: {e}; content-type={ct}; body={body}", + id.0 + )) + })?; + let task_obj = parsed + .get("task") + .and_then(Value::as_object) + .ok_or_else(|| { + CloudTaskError::Http(format!("Task metadata missing from details for {id_str}")) + })?; + let status_display = parsed + .get("task_status_display") + .or_else(|| task_obj.get("task_status_display")) + .and_then(Value::as_object) + .map(|m| { + m.iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect::>() + }); + let status = map_status(status_display.as_ref()); + let mut summary = diff_summary_from_status_display(status_display.as_ref()); + if summary.files_changed == 0 + && summary.lines_added == 0 + && summary.lines_removed == 0 + && let Some(diff) = details.unified_diff() + { + summary = diff_summary_from_diff(&diff); + } + let updated_at_raw = task_obj + .get("updated_at") + .and_then(Value::as_f64) + .or_else(|| task_obj.get("created_at").and_then(Value::as_f64)) + .or_else(|| latest_turn_timestamp(status_display.as_ref())); + let environment_id = task_obj + .get("environment_id") + .and_then(Value::as_str) + .map(str::to_string); + let environment_label = env_label_from_status_display(status_display.as_ref()); + let attempt_total = attempt_total_from_status_display(status_display.as_ref()); + let title = task_obj + .get("title") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let is_review = task_obj + .get("is_review") + .and_then(Value::as_bool) + .unwrap_or(false); + Ok(TaskSummary { + id, + title, + status, + updated_at: parse_updated_at(updated_at_raw.as_ref()), + environment_id, + environment_label, + summary, + is_review, + attempt_total, + }) + } + pub(crate) async fn diff(&self, id: TaskId) -> Result> { let (details, body, ct) = self .details_with_body(&id.0) @@ -679,6 +752,34 @@ mod api { .map(str::to_string) } + fn diff_summary_from_diff(diff: &str) -> DiffSummary { + let mut files_changed = 0usize; + let mut lines_added = 0usize; + let mut lines_removed = 0usize; + for line in diff.lines() { + if line.starts_with("diff --git ") { + files_changed += 1; + continue; + } + if line.starts_with("+++") || line.starts_with("---") || line.starts_with("@@") { + continue; + } + match line.as_bytes().first() { + Some(b'+') => lines_added += 1, + Some(b'-') => lines_removed += 1, + _ => {} + } + } + if files_changed == 0 && !diff.trim().is_empty() { + files_changed = 1; + } + DiffSummary { + files_changed, + lines_added, + lines_removed, + } + } + fn diff_summary_from_status_display(v: Option<&HashMap>) -> DiffSummary { let mut out = DiffSummary::default(); let Some(map) = v else { return out }; @@ -700,6 +801,17 @@ mod api { out } + fn latest_turn_timestamp(v: Option<&HashMap>) -> Option { + let map = v?; + let latest = map + .get("latest_turn_status_display") + .and_then(Value::as_object)?; + latest + .get("updated_at") + .or_else(|| latest.get("created_at")) + .and_then(Value::as_f64) + } + fn attempt_total_from_status_display(v: Option<&HashMap>) -> Option { let map = v?; let latest = map diff --git a/codex-rs/cloud-tasks-client/src/mock.rs b/codex-rs/cloud-tasks-client/src/mock.rs index 97bc5520a8..2d03cea029 100644 --- a/codex-rs/cloud-tasks-client/src/mock.rs +++ b/codex-rs/cloud-tasks-client/src/mock.rs @@ -1,6 +1,7 @@ use crate::ApplyOutcome; use crate::AttemptStatus; use crate::CloudBackend; +use crate::CloudTaskError; use crate::DiffSummary; use crate::Result; use crate::TaskId; @@ -60,6 +61,14 @@ impl CloudBackend for MockClient { Ok(out) } + async fn get_task_summary(&self, id: TaskId) -> Result { + let tasks = self.list_tasks(None).await?; + tasks + .into_iter() + .find(|t| t.id == id) + .ok_or_else(|| CloudTaskError::Msg(format!("Task {} not found (mock)", id.0))) + } + async fn get_task_diff(&self, id: TaskId) -> Result> { Ok(Some(mock_diff_for(&id))) } diff --git a/codex-rs/cloud-tasks/Cargo.toml b/codex-rs/cloud-tasks/Cargo.toml index c9edf5b4ad..188538bec6 100644 --- a/codex-rs/cloud-tasks/Cargo.toml +++ b/codex-rs/cloud-tasks/Cargo.toml @@ -34,6 +34,9 @@ tokio-stream = { workspace = true } tracing = { workspace = true, features = ["log"] } tracing-subscriber = { workspace = true, features = ["env-filter"] } unicode-width = { workspace = true } +owo-colors = { workspace = true, features = ["supports-colors"] } +supports-color = { workspace = true } [dev-dependencies] async-trait = { workspace = true } +pretty_assertions = { workspace = true } diff --git a/codex-rs/cloud-tasks/src/app.rs b/codex-rs/cloud-tasks/src/app.rs index 612c5f6be4..ce12128a3e 100644 --- a/codex-rs/cloud-tasks/src/app.rs +++ b/codex-rs/cloud-tasks/src/app.rs @@ -350,6 +350,7 @@ pub enum AppEvent { mod tests { use super::*; use chrono::Utc; + use codex_cloud_tasks_client::CloudTaskError; struct FakeBackend { // maps env key to titles @@ -385,6 +386,17 @@ mod tests { Ok(out) } + async fn get_task_summary( + &self, + id: TaskId, + ) -> codex_cloud_tasks_client::Result { + self.list_tasks(None) + .await? + .into_iter() + .find(|t| t.id == id) + .ok_or_else(|| CloudTaskError::Msg(format!("Task {} not found", id.0))) + } + async fn get_task_diff( &self, _id: TaskId, diff --git a/codex-rs/cloud-tasks/src/cli.rs b/codex-rs/cloud-tasks/src/cli.rs index 4122aeff68..a7612153b4 100644 --- a/codex-rs/cloud-tasks/src/cli.rs +++ b/codex-rs/cloud-tasks/src/cli.rs @@ -16,6 +16,12 @@ pub struct Cli { pub enum Command { /// Submit a new Codex Cloud task without launching the TUI. Exec(ExecCommand), + /// Show the status of a Codex Cloud task. + Status(StatusCommand), + /// Apply the diff for a Codex Cloud task locally. + Apply(ApplyCommand), + /// Show the unified diff for a Codex Cloud task. + Diff(DiffCommand), } #[derive(Debug, Args)] @@ -28,6 +34,10 @@ pub struct ExecCommand { #[arg(long = "env", value_name = "ENV_ID")] pub environment: String, + /// Git branch to run in Codex Cloud. + #[arg(long = "branch", value_name = "BRANCH", default_value = "main")] + pub branch: String, + /// Number of assistant attempts (best-of-N). #[arg( long = "attempts", @@ -47,3 +57,32 @@ fn parse_attempts(input: &str) -> Result { Err("attempts must be between 1 and 4".to_string()) } } + +#[derive(Debug, Args)] +pub struct StatusCommand { + /// Codex Cloud task identifier to inspect. + #[arg(value_name = "TASK_ID")] + pub task_id: String, +} + +#[derive(Debug, Args)] +pub struct ApplyCommand { + /// Codex Cloud task identifier to apply. + #[arg(value_name = "TASK_ID")] + pub task_id: String, + + /// Attempt number to apply (1-based). + #[arg(long = "attempt", value_parser = parse_attempts, value_name = "N")] + pub attempt: Option, +} + +#[derive(Debug, Args)] +pub struct DiffCommand { + /// Codex Cloud task identifier to display. + #[arg(value_name = "TASK_ID")] + pub task_id: String, + + /// Attempt number to display (1-based). + #[arg(long = "attempt", value_parser = parse_attempts, value_name = "N")] + pub attempt: Option, +} diff --git a/codex-rs/cloud-tasks/src/lib.rs b/codex-rs/cloud-tasks/src/lib.rs index 6fc721404b..f73e07f3af 100644 --- a/codex-rs/cloud-tasks/src/lib.rs +++ b/codex-rs/cloud-tasks/src/lib.rs @@ -8,17 +8,24 @@ pub mod util; pub use cli::Cli; use anyhow::anyhow; +use chrono::Utc; +use codex_cloud_tasks_client::TaskStatus; use codex_login::AuthManager; +use owo_colors::OwoColorize; +use owo_colors::Stream; +use std::cmp::Ordering; use std::io::IsTerminal; use std::io::Read; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use std::time::Instant; +use supports_color::Stream as SupportStream; use tokio::sync::mpsc::UnboundedSender; use tracing::info; use tracing_subscriber::EnvFilter; use util::append_error_log; +use util::format_relative_time; use util::set_user_agent_suffix; struct ApplyJob { @@ -101,6 +108,7 @@ async fn run_exec_command(args: crate::cli::ExecCommand) -> anyhow::Result<()> { let crate::cli::ExecCommand { query, environment, + branch, attempts, } = args; let ctx = init_backend("codex_cloud_tasks_exec").await?; @@ -110,7 +118,7 @@ async fn run_exec_command(args: crate::cli::ExecCommand) -> anyhow::Result<()> { &*ctx.backend, &env_id, &prompt, - "main", + &branch, false, attempts, ) @@ -192,6 +200,273 @@ fn resolve_query_input(query_arg: Option) -> anyhow::Result { } } +fn parse_task_id(raw: &str) -> anyhow::Result { + let trimmed = raw.trim(); + if trimmed.is_empty() { + anyhow::bail!("task id must not be empty"); + } + let without_fragment = trimmed.split('#').next().unwrap_or(trimmed); + let without_query = without_fragment + .split('?') + .next() + .unwrap_or(without_fragment); + let id = without_query + .rsplit('/') + .next() + .unwrap_or(without_query) + .trim(); + if id.is_empty() { + anyhow::bail!("task id must not be empty"); + } + Ok(codex_cloud_tasks_client::TaskId(id.to_string())) +} + +#[derive(Clone, Debug)] +struct AttemptDiffData { + placement: Option, + created_at: Option>, + diff: String, +} + +fn cmp_attempt(lhs: &AttemptDiffData, rhs: &AttemptDiffData) -> Ordering { + match (lhs.placement, rhs.placement) { + (Some(a), Some(b)) => a.cmp(&b), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => match (lhs.created_at, rhs.created_at) { + (Some(a), Some(b)) => a.cmp(&b), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => Ordering::Equal, + }, + } +} + +async fn collect_attempt_diffs( + backend: &dyn codex_cloud_tasks_client::CloudBackend, + task_id: &codex_cloud_tasks_client::TaskId, +) -> anyhow::Result> { + let text = + codex_cloud_tasks_client::CloudBackend::get_task_text(backend, task_id.clone()).await?; + let mut attempts = Vec::new(); + if let Some(diff) = + codex_cloud_tasks_client::CloudBackend::get_task_diff(backend, task_id.clone()).await? + { + attempts.push(AttemptDiffData { + placement: text.attempt_placement, + created_at: None, + diff, + }); + } + if let Some(turn_id) = text.turn_id { + let siblings = codex_cloud_tasks_client::CloudBackend::list_sibling_attempts( + backend, + task_id.clone(), + turn_id, + ) + .await?; + for sibling in siblings { + if let Some(diff) = sibling.diff { + attempts.push(AttemptDiffData { + placement: sibling.attempt_placement, + created_at: sibling.created_at, + diff, + }); + } + } + } + attempts.sort_by(cmp_attempt); + if attempts.is_empty() { + anyhow::bail!( + "No diff available for task {}; it may still be running.", + task_id.0 + ); + } + Ok(attempts) +} + +fn select_attempt( + attempts: &[AttemptDiffData], + attempt: Option, +) -> anyhow::Result<&AttemptDiffData> { + if attempts.is_empty() { + anyhow::bail!("No attempts available"); + } + let desired = attempt.unwrap_or(1); + let idx = desired + .checked_sub(1) + .ok_or_else(|| anyhow!("attempt must be at least 1"))?; + if idx >= attempts.len() { + anyhow::bail!( + "Attempt {desired} not available; only {} attempt(s) found", + attempts.len() + ); + } + Ok(&attempts[idx]) +} + +fn task_status_label(status: &TaskStatus) -> &'static str { + match status { + TaskStatus::Pending => "PENDING", + TaskStatus::Ready => "READY", + TaskStatus::Applied => "APPLIED", + TaskStatus::Error => "ERROR", + } +} + +fn summary_line(summary: &codex_cloud_tasks_client::DiffSummary, colorize: bool) -> String { + if summary.files_changed == 0 && summary.lines_added == 0 && summary.lines_removed == 0 { + let base = "no diff"; + return if colorize { + base.if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string() + } else { + base.to_string() + }; + } + let adds = summary.lines_added; + let dels = summary.lines_removed; + let files = summary.files_changed; + if colorize { + let adds_raw = format!("+{adds}"); + let adds_str = adds_raw + .as_str() + .if_supports_color(Stream::Stdout, |t| t.green()) + .to_string(); + let dels_raw = format!("-{dels}"); + let dels_str = dels_raw + .as_str() + .if_supports_color(Stream::Stdout, |t| t.red()) + .to_string(); + let bullet = "•" + .if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string(); + let file_label = "file" + .if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string(); + let plural = if files == 1 { "" } else { "s" }; + format!("{adds_str}/{dels_str} {bullet} {files} {file_label}{plural}") + } else { + format!( + "+{adds}/-{dels} • {files} file{}", + if files == 1 { "" } else { "s" } + ) + } +} + +fn format_task_status_lines( + task: &codex_cloud_tasks_client::TaskSummary, + now: chrono::DateTime, + colorize: bool, +) -> Vec { + let mut lines = Vec::new(); + let status = task_status_label(&task.status); + let status = if colorize { + match task.status { + TaskStatus::Ready => status + .if_supports_color(Stream::Stdout, |t| t.green()) + .to_string(), + TaskStatus::Pending => status + .if_supports_color(Stream::Stdout, |t| t.magenta()) + .to_string(), + TaskStatus::Applied => status + .if_supports_color(Stream::Stdout, |t| t.blue()) + .to_string(), + TaskStatus::Error => status + .if_supports_color(Stream::Stdout, |t| t.red()) + .to_string(), + } + } else { + status.to_string() + }; + lines.push(format!("[{status}] {}", task.title)); + let mut meta_parts = Vec::new(); + if let Some(label) = task.environment_label.as_deref().filter(|s| !s.is_empty()) { + if colorize { + meta_parts.push( + label + .if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string(), + ); + } else { + meta_parts.push(label.to_string()); + } + } else if let Some(id) = task.environment_id.as_deref() { + if colorize { + meta_parts.push( + id.if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string(), + ); + } else { + meta_parts.push(id.to_string()); + } + } + let when = format_relative_time(now, task.updated_at); + meta_parts.push(if colorize { + when.as_str() + .if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string() + } else { + when + }); + let sep = if colorize { + " • " + .if_supports_color(Stream::Stdout, |t| t.dimmed()) + .to_string() + } else { + " • ".to_string() + }; + lines.push(meta_parts.join(&sep)); + lines.push(summary_line(&task.summary, colorize)); + lines +} + +async fn run_status_command(args: crate::cli::StatusCommand) -> anyhow::Result<()> { + let ctx = init_backend("codex_cloud_tasks_status").await?; + let task_id = parse_task_id(&args.task_id)?; + let summary = + codex_cloud_tasks_client::CloudBackend::get_task_summary(&*ctx.backend, task_id).await?; + let now = Utc::now(); + let colorize = supports_color::on(SupportStream::Stdout).is_some(); + for line in format_task_status_lines(&summary, now, colorize) { + println!("{line}"); + } + if !matches!(summary.status, TaskStatus::Ready) { + std::process::exit(1); + } + Ok(()) +} + +async fn run_diff_command(args: crate::cli::DiffCommand) -> anyhow::Result<()> { + let ctx = init_backend("codex_cloud_tasks_diff").await?; + let task_id = parse_task_id(&args.task_id)?; + let attempts = collect_attempt_diffs(&*ctx.backend, &task_id).await?; + let selected = select_attempt(&attempts, args.attempt)?; + print!("{}", selected.diff); + Ok(()) +} + +async fn run_apply_command(args: crate::cli::ApplyCommand) -> anyhow::Result<()> { + let ctx = init_backend("codex_cloud_tasks_apply").await?; + let task_id = parse_task_id(&args.task_id)?; + let attempts = collect_attempt_diffs(&*ctx.backend, &task_id).await?; + let selected = select_attempt(&attempts, args.attempt)?; + let outcome = codex_cloud_tasks_client::CloudBackend::apply_task( + &*ctx.backend, + task_id, + Some(selected.diff.clone()), + ) + .await?; + println!("{}", outcome.message); + if !matches!( + outcome.status, + codex_cloud_tasks_client::ApplyStatus::Success + ) { + std::process::exit(1); + } + Ok(()) +} + fn level_from_status(status: codex_cloud_tasks_client::ApplyStatus) -> app::ApplyResultLevel { match status { codex_cloud_tasks_client::ApplyStatus::Success => app::ApplyResultLevel::Success, @@ -321,6 +596,9 @@ pub async fn run_main(cli: Cli, _codex_linux_sandbox_exe: Option) -> an if let Some(command) = cli.command { return match command { crate::cli::Command::Exec(args) => run_exec_command(args).await, + crate::cli::Command::Status(args) => run_status_command(args).await, + crate::cli::Command::Apply(args) => run_apply_command(args).await, + crate::cli::Command::Diff(args) => run_diff_command(args).await, }; } let Cli { .. } = cli; @@ -1712,14 +1990,111 @@ fn pretty_lines_from_error(raw: &str) -> Vec { #[cfg(test)] mod tests { + use super::*; + use codex_cloud_tasks_client::DiffSummary; + use codex_cloud_tasks_client::MockClient; + use codex_cloud_tasks_client::TaskId; + use codex_cloud_tasks_client::TaskStatus; + use codex_cloud_tasks_client::TaskSummary; use codex_tui::ComposerAction; use codex_tui::ComposerInput; use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use crossterm::event::KeyModifiers; + use pretty_assertions::assert_eq; use ratatui::buffer::Buffer; use ratatui::layout::Rect; + #[test] + fn format_task_status_lines_with_diff_and_label() { + let now = Utc::now(); + let task = TaskSummary { + id: TaskId("task_1".to_string()), + title: "Example task".to_string(), + status: TaskStatus::Ready, + updated_at: now, + environment_id: Some("env-1".to_string()), + environment_label: Some("Env".to_string()), + summary: DiffSummary { + files_changed: 3, + lines_added: 5, + lines_removed: 2, + }, + is_review: false, + attempt_total: None, + }; + let lines = format_task_status_lines(&task, now, false); + assert_eq!( + lines, + vec![ + "[READY] Example task".to_string(), + "Env • 0s ago".to_string(), + "+5/-2 • 3 files".to_string(), + ] + ); + } + + #[test] + fn format_task_status_lines_without_diff_falls_back() { + let now = Utc::now(); + let task = TaskSummary { + id: TaskId("task_2".to_string()), + title: "No diff task".to_string(), + status: TaskStatus::Pending, + updated_at: now, + environment_id: Some("env-2".to_string()), + environment_label: None, + summary: DiffSummary::default(), + is_review: false, + attempt_total: Some(1), + }; + let lines = format_task_status_lines(&task, now, false); + assert_eq!( + lines, + vec![ + "[PENDING] No diff task".to_string(), + "env-2 • 0s ago".to_string(), + "no diff".to_string(), + ] + ); + } + + #[tokio::test] + async fn collect_attempt_diffs_includes_sibling_attempts() { + let backend = MockClient; + let task_id = parse_task_id("https://chatgpt.com/codex/tasks/T-1000").expect("id"); + let attempts = collect_attempt_diffs(&backend, &task_id) + .await + .expect("attempts"); + assert_eq!(attempts.len(), 2); + assert_eq!(attempts[0].placement, Some(0)); + assert_eq!(attempts[1].placement, Some(1)); + assert!(!attempts[0].diff.is_empty()); + assert!(!attempts[1].diff.is_empty()); + } + + #[test] + fn select_attempt_validates_bounds() { + let attempts = vec![AttemptDiffData { + placement: Some(0), + created_at: None, + diff: "diff --git a/file b/file\n".to_string(), + }]; + let first = select_attempt(&attempts, Some(1)).expect("attempt 1"); + assert_eq!(first.diff, "diff --git a/file b/file\n"); + assert!(select_attempt(&attempts, Some(2)).is_err()); + } + + #[test] + fn parse_task_id_from_url_and_raw() { + let raw = parse_task_id("task_i_abc123").expect("raw id"); + assert_eq!(raw.0, "task_i_abc123"); + let url = + parse_task_id("https://chatgpt.com/codex/tasks/task_i_123456?foo=bar").expect("url id"); + assert_eq!(url.0, "task_i_123456"); + assert!(parse_task_id(" ").is_err()); + } + #[test] #[ignore = "very slow"] fn composer_input_renders_typed_characters() { diff --git a/codex-rs/cloud-tasks/src/ui.rs b/codex-rs/cloud-tasks/src/ui.rs index e3a97aeb3f..4c41ca576c 100644 --- a/codex-rs/cloud-tasks/src/ui.rs +++ b/codex-rs/cloud-tasks/src/ui.rs @@ -20,8 +20,7 @@ use std::time::Instant; use crate::app::App; use crate::app::AttemptView; -use chrono::Local; -use chrono::Utc; +use crate::util::format_relative_time_now; use codex_cloud_tasks_client::AttemptStatus; use codex_cloud_tasks_client::TaskStatus; use codex_tui::render_markdown_text; @@ -804,7 +803,7 @@ fn render_task_item(_app: &App, t: &codex_cloud_tasks_client::TaskSummary) -> Li if let Some(lbl) = t.environment_label.as_ref().filter(|s| !s.is_empty()) { meta.push(lbl.clone().dim()); } - let when = format_relative_time(t.updated_at).dim(); + let when = format_relative_time_now(t.updated_at).dim(); if !meta.is_empty() { meta.push(" ".into()); meta.push("•".dim()); @@ -841,27 +840,6 @@ fn render_task_item(_app: &App, t: &codex_cloud_tasks_client::TaskSummary) -> Li ListItem::new(vec![title, meta_line, sub, spacer]) } -fn format_relative_time(ts: chrono::DateTime) -> String { - let now = Utc::now(); - let mut secs = (now - ts).num_seconds(); - if secs < 0 { - secs = 0; - } - if secs < 60 { - return format!("{secs}s ago"); - } - let mins = secs / 60; - if mins < 60 { - return format!("{mins}m ago"); - } - let hours = mins / 60; - if hours < 24 { - return format!("{hours}h ago"); - } - let local = ts.with_timezone(&Local); - local.format("%b %e %H:%M").to_string() -} - fn draw_inline_spinner( frame: &mut Frame, area: Rect, diff --git a/codex-rs/cloud-tasks/src/util.rs b/codex-rs/cloud-tasks/src/util.rs index 1c690b26c0..79513dbcf2 100644 --- a/codex-rs/cloud-tasks/src/util.rs +++ b/codex-rs/cloud-tasks/src/util.rs @@ -1,4 +1,6 @@ use base64::Engine as _; +use chrono::DateTime; +use chrono::Local; use chrono::Utc; use reqwest::header::HeaderMap; @@ -120,3 +122,27 @@ pub fn task_url(base_url: &str, task_id: &str) -> String { } format!("{normalized}/codex/tasks/{task_id}") } + +pub fn format_relative_time(reference: DateTime, ts: DateTime) -> String { + let mut secs = (reference - ts).num_seconds(); + if secs < 0 { + secs = 0; + } + if secs < 60 { + return format!("{secs}s ago"); + } + let mins = secs / 60; + if mins < 60 { + return format!("{mins}m ago"); + } + let hours = mins / 60; + if hours < 24 { + return format!("{hours}h ago"); + } + let local = ts.with_timezone(&Local); + local.format("%b %e %H:%M").to_string() +} + +pub fn format_relative_time_now(ts: DateTime) -> String { + format_relative_time(Utc::now(), ts) +} diff --git a/codex-rs/codex-api/Cargo.toml b/codex-rs/codex-api/Cargo.toml index f79416c96e..e9fc78878b 100644 --- a/codex-rs/codex-api/Cargo.toml +++ b/codex-rs/codex-api/Cargo.toml @@ -25,6 +25,8 @@ anyhow = { workspace = true } assert_matches = { workspace = true } pretty_assertions = { workspace = true } tokio-test = { workspace = true } +wiremock = { workspace = true } +reqwest = { workspace = true } [lints] workspace = true diff --git a/codex-rs/codex-api/src/common.rs b/codex-rs/codex-api/src/common.rs index addab02dc7..19e82de332 100644 --- a/codex-rs/codex-api/src/common.rs +++ b/codex-rs/codex-api/src/common.rs @@ -1,8 +1,8 @@ use crate::error::ApiError; -use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::config_types::Verbosity as VerbosityConfig; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::protocol::RateLimitSnapshot; use codex_protocol::protocol::TokenUsage; use futures::Stream; diff --git a/codex-rs/codex-api/src/endpoint/mod.rs b/codex-rs/codex-api/src/endpoint/mod.rs index 104b4c2640..cb0eeb9f20 100644 --- a/codex-rs/codex-api/src/endpoint/mod.rs +++ b/codex-rs/codex-api/src/endpoint/mod.rs @@ -1,4 +1,5 @@ pub mod chat; pub mod compact; +pub mod models; pub mod responses; mod streaming; diff --git a/codex-rs/codex-api/src/endpoint/models.rs b/codex-rs/codex-api/src/endpoint/models.rs new file mode 100644 index 0000000000..b15f07fca2 --- /dev/null +++ b/codex-rs/codex-api/src/endpoint/models.rs @@ -0,0 +1,286 @@ +use crate::auth::AuthProvider; +use crate::auth::add_auth_headers; +use crate::error::ApiError; +use crate::provider::Provider; +use crate::telemetry::run_with_request_telemetry; +use codex_client::HttpTransport; +use codex_client::RequestTelemetry; +use codex_protocol::openai_models::ModelsResponse; +use http::HeaderMap; +use http::Method; +use http::header::ETAG; +use std::sync::Arc; + +pub struct ModelsClient { + transport: T, + provider: Provider, + auth: A, + request_telemetry: Option>, +} + +impl ModelsClient { + pub fn new(transport: T, provider: Provider, auth: A) -> Self { + Self { + transport, + provider, + auth, + request_telemetry: None, + } + } + + pub fn with_telemetry(mut self, request: Option>) -> Self { + self.request_telemetry = request; + self + } + + fn path(&self) -> &'static str { + "models" + } + + pub async fn list_models( + &self, + client_version: &str, + extra_headers: HeaderMap, + ) -> Result { + let builder = || { + let mut req = self.provider.build_request(Method::GET, self.path()); + req.headers.extend(extra_headers.clone()); + + let separator = if req.url.contains('?') { '&' } else { '?' }; + req.url = format!("{}{}client_version={client_version}", req.url, separator); + + add_auth_headers(&self.auth, req) + }; + + let resp = run_with_request_telemetry( + self.provider.retry.to_policy(), + self.request_telemetry.clone(), + builder, + |req| self.transport.execute(req), + ) + .await?; + + let header_etag = resp + .headers + .get(ETAG) + .and_then(|value| value.to_str().ok()) + .map(ToString::to_string); + + let ModelsResponse { models, etag } = serde_json::from_slice::(&resp.body) + .map_err(|e| { + ApiError::Stream(format!( + "failed to decode models response: {e}; body: {}", + String::from_utf8_lossy(&resp.body) + )) + })?; + + let etag = header_etag.unwrap_or(etag); + + Ok(ModelsResponse { models, etag }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::provider::RetryConfig; + use crate::provider::WireApi; + use async_trait::async_trait; + use codex_client::Request; + use codex_client::Response; + use codex_client::StreamResponse; + use codex_client::TransportError; + use http::HeaderMap; + use http::StatusCode; + use pretty_assertions::assert_eq; + use serde_json::json; + use std::sync::Arc; + use std::sync::Mutex; + use std::time::Duration; + + #[derive(Clone)] + struct CapturingTransport { + last_request: Arc>>, + body: Arc, + } + + impl Default for CapturingTransport { + fn default() -> Self { + Self { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(ModelsResponse { + models: Vec::new(), + etag: String::new(), + }), + } + } + } + + #[async_trait] + impl HttpTransport for CapturingTransport { + async fn execute(&self, req: Request) -> Result { + *self.last_request.lock().unwrap() = Some(req); + let body = serde_json::to_vec(&*self.body).unwrap(); + let mut headers = HeaderMap::new(); + if !self.body.etag.is_empty() { + headers.insert(ETAG, self.body.etag.parse().unwrap()); + } + Ok(Response { + status: StatusCode::OK, + headers, + body: body.into(), + }) + } + + async fn stream(&self, _req: Request) -> Result { + Err(TransportError::Build("stream should not run".to_string())) + } + } + + #[derive(Clone, Default)] + struct DummyAuth; + + impl AuthProvider for DummyAuth { + fn bearer_token(&self) -> Option { + None + } + } + + fn provider(base_url: &str) -> Provider { + Provider { + name: "test".to_string(), + base_url: base_url.to_string(), + query_params: None, + wire: WireApi::Responses, + headers: HeaderMap::new(), + retry: RetryConfig { + max_attempts: 1, + base_delay: Duration::from_millis(1), + retry_429: false, + retry_5xx: true, + retry_transport: true, + }, + stream_idle_timeout: Duration::from_secs(1), + } + } + + #[tokio::test] + async fn appends_client_version_query() { + let response = ModelsResponse { + models: Vec::new(), + etag: String::new(), + }; + + let transport = CapturingTransport { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(response), + }; + + let client = ModelsClient::new( + transport.clone(), + provider("https://example.com/api/codex"), + DummyAuth, + ); + + let result = client + .list_models("0.99.0", HeaderMap::new()) + .await + .expect("request should succeed"); + + assert_eq!(result.models.len(), 0); + + let url = transport + .last_request + .lock() + .unwrap() + .as_ref() + .unwrap() + .url + .clone(); + assert_eq!( + url, + "https://example.com/api/codex/models?client_version=0.99.0" + ); + } + + #[tokio::test] + async fn parses_models_response() { + let response = ModelsResponse { + models: vec![ + serde_json::from_value(json!({ + "slug": "gpt-test", + "display_name": "gpt-test", + "description": "desc", + "default_reasoning_level": "medium", + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}, {"effort": "high", "description": "high"}], + "shell_type": "shell_command", + "visibility": "list", + "minimal_client_version": [0, 99, 0], + "supported_in_api": true, + "priority": 1, + "upgrade": null, + "base_instructions": null, + "supports_reasoning_summaries": false, + "support_verbosity": false, + "default_verbosity": null, + "apply_patch_tool_type": null, + "truncation_policy": {"mode": "bytes", "limit": 10_000}, + "supports_parallel_tool_calls": false, + "context_window": null, + "reasoning_summary_format": "none", + "experimental_supported_tools": [], + })) + .unwrap(), + ], + etag: String::new(), + }; + + let transport = CapturingTransport { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(response), + }; + + let client = ModelsClient::new( + transport, + provider("https://example.com/api/codex"), + DummyAuth, + ); + + let result = client + .list_models("0.99.0", HeaderMap::new()) + .await + .expect("request should succeed"); + + assert_eq!(result.models.len(), 1); + assert_eq!(result.models[0].slug, "gpt-test"); + assert_eq!(result.models[0].supported_in_api, true); + assert_eq!(result.models[0].priority, 1); + } + + #[tokio::test] + async fn list_models_includes_etag() { + let response = ModelsResponse { + models: Vec::new(), + etag: "\"abc\"".to_string(), + }; + + let transport = CapturingTransport { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(response), + }; + + let client = ModelsClient::new( + transport, + provider("https://example.com/api/codex"), + DummyAuth, + ); + + let result = client + .list_models("0.1.0", HeaderMap::new()) + .await + .expect("request should succeed"); + + assert_eq!(result.models.len(), 0); + assert_eq!(result.etag, "\"abc\""); + } +} diff --git a/codex-rs/codex-api/src/lib.rs b/codex-rs/codex-api/src/lib.rs index acde4b4589..d0c382ac8c 100644 --- a/codex-rs/codex-api/src/lib.rs +++ b/codex-rs/codex-api/src/lib.rs @@ -22,6 +22,7 @@ pub use crate::common::create_text_param_for_request; pub use crate::endpoint::chat::AggregateStreamExt; pub use crate::endpoint::chat::ChatClient; pub use crate::endpoint::compact::CompactClient; +pub use crate::endpoint::models::ModelsClient; pub use crate::endpoint::responses::ResponsesClient; pub use crate::endpoint::responses::ResponsesOptions; pub use crate::error::ApiError; diff --git a/codex-rs/codex-api/src/rate_limits.rs b/codex-rs/codex-api/src/rate_limits.rs index 69092063f6..bb8ede2f57 100644 --- a/codex-rs/codex-api/src/rate_limits.rs +++ b/codex-rs/codex-api/src/rate_limits.rs @@ -37,6 +37,7 @@ pub fn parse_rate_limit(headers: &HeaderMap) -> Option { primary, secondary, credits, + plan_type: None, }) } diff --git a/codex-rs/codex-api/src/sse/chat.rs b/codex-rs/codex-api/src/sse/chat.rs index 7f50bb634a..21adfa571a 100644 --- a/codex-rs/codex-api/src/sse/chat.rs +++ b/codex-rs/codex-api/src/sse/chat.rs @@ -10,6 +10,7 @@ use eventsource_stream::Eventsource; use futures::Stream; use futures::StreamExt; use std::collections::HashMap; +use std::collections::HashSet; use std::time::Duration; use tokio::sync::mpsc; use tokio::time::Instant; @@ -41,12 +42,17 @@ pub async fn process_chat_sse( #[derive(Default, Debug)] struct ToolCallState { + id: Option, name: Option, arguments: String, } - let mut tool_calls: HashMap = HashMap::new(); - let mut tool_call_order: Vec = Vec::new(); + let mut tool_calls: HashMap = HashMap::new(); + let mut tool_call_order: Vec = Vec::new(); + let mut tool_call_order_seen: HashSet = HashSet::new(); + let mut tool_call_index_by_id: HashMap = HashMap::new(); + let mut next_tool_call_index = 0usize; + let mut last_tool_call_index: Option = None; let mut assistant_item: Option = None; let mut reasoning_item: Option = None; let mut completed_sent = false; @@ -149,26 +155,55 @@ pub async fn process_chat_sse( if let Some(tool_call_values) = delta.get("tool_calls").and_then(|c| c.as_array()) { for tool_call in tool_call_values { - let id = tool_call - .get("id") - .and_then(|i| i.as_str()) - .map(str::to_string) - .unwrap_or_else(|| format!("tool-call-{}", tool_call_order.len())); + let mut index = tool_call + .get("index") + .and_then(serde_json::Value::as_u64) + .map(|i| i as usize); - let call_state = tool_calls.entry(id.clone()).or_default(); - if !tool_call_order.contains(&id) { - tool_call_order.push(id.clone()); + let mut call_id_for_lookup = None; + if let Some(call_id) = tool_call.get("id").and_then(|i| i.as_str()) { + call_id_for_lookup = Some(call_id.to_string()); + if let Some(existing) = tool_call_index_by_id.get(call_id) { + index = Some(*existing); + } + } + + if index.is_none() && call_id_for_lookup.is_none() { + index = last_tool_call_index; + } + + let index = index.unwrap_or_else(|| { + while tool_calls.contains_key(&next_tool_call_index) { + next_tool_call_index += 1; + } + let idx = next_tool_call_index; + next_tool_call_index += 1; + idx + }); + + let call_state = tool_calls.entry(index).or_default(); + if tool_call_order_seen.insert(index) { + tool_call_order.push(index); + } + + if let Some(id) = tool_call.get("id").and_then(|i| i.as_str()) { + call_state.id.get_or_insert_with(|| id.to_string()); + tool_call_index_by_id.entry(id.to_string()).or_insert(index); } if let Some(func) = tool_call.get("function") { - if let Some(fname) = func.get("name").and_then(|n| n.as_str()) { - call_state.name = Some(fname.to_string()); + if let Some(fname) = func.get("name").and_then(|n| n.as_str()) + && !fname.is_empty() + { + call_state.name.get_or_insert_with(|| fname.to_string()); } if let Some(arguments) = func.get("arguments").and_then(|a| a.as_str()) { call_state.arguments.push_str(arguments); } } + + last_tool_call_index = Some(index); } } } @@ -222,13 +257,25 @@ pub async fn process_chat_sse( .await; } - for call_id in tool_call_order.drain(..) { - let state = tool_calls.remove(&call_id).unwrap_or_default(); + for index in tool_call_order.drain(..) { + let Some(state) = tool_calls.remove(&index) else { + continue; + }; + tool_call_order_seen.remove(&index); + let ToolCallState { + id, + name, + arguments, + } = state; + let Some(name) = name else { + debug!("Skipping tool call at index {index} because name is missing"); + continue; + }; let item = ResponseItem::FunctionCall { id: None, - name: state.name.unwrap_or_default(), - arguments: state.arguments, - call_id: call_id.clone(), + name, + arguments, + call_id: id.unwrap_or_else(|| format!("tool-call-{index}")), }; let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await; } @@ -333,6 +380,59 @@ mod tests { out } + #[tokio::test] + async fn concatenates_tool_call_arguments_across_deltas() { + let delta_name = json!({ + "choices": [{ + "delta": { + "tool_calls": [{ + "id": "call_a", + "index": 0, + "function": { "name": "do_a" } + }] + } + }] + }); + + let delta_args_1 = json!({ + "choices": [{ + "delta": { + "tool_calls": [{ + "index": 0, + "function": { "arguments": "{ \"foo\":" } + }] + } + }] + }); + + let delta_args_2 = json!({ + "choices": [{ + "delta": { + "tool_calls": [{ + "index": 0, + "function": { "arguments": "1}" } + }] + } + }] + }); + + let finish = json!({ + "choices": [{ + "finish_reason": "tool_calls" + }] + }); + + let body = build_body(&[delta_name, delta_args_1, delta_args_2, finish]); + let events = collect_events(&body).await; + assert_matches!( + &events[..], + [ + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id, name, arguments, .. }), + ResponseEvent::Completed { .. } + ] if call_id == "call_a" && name == "do_a" && arguments == "{ \"foo\":1}" + ); + } + #[tokio::test] async fn emits_multiple_tool_calls() { let delta_a = json!({ @@ -365,50 +465,74 @@ mod tests { let body = build_body(&[delta_a, delta_b, finish]); let events = collect_events(&body).await; - assert_eq!(events.len(), 3); - assert_matches!( - &events[0], - ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id, name, arguments, .. }) - if call_id == "call_a" && name == "do_a" && arguments == "{\"foo\":1}" + &events[..], + [ + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id: call_a, name: name_a, arguments: args_a, .. }), + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id: call_b, name: name_b, arguments: args_b, .. }), + ResponseEvent::Completed { .. } + ] if call_a == "call_a" && name_a == "do_a" && args_a == "{\"foo\":1}" && call_b == "call_b" && name_b == "do_b" && args_b == "{\"bar\":2}" ); - assert_matches!( - &events[1], - ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id, name, arguments, .. }) - if call_id == "call_b" && name == "do_b" && arguments == "{\"bar\":2}" - ); - assert_matches!(events[2], ResponseEvent::Completed { .. }); } #[tokio::test] - async fn concatenates_tool_call_arguments_across_deltas() { - let delta_name = json!({ + async fn emits_tool_calls_for_multiple_choices() { + let payload = json!({ + "choices": [ + { + "delta": { + "tool_calls": [{ + "id": "call_a", + "index": 0, + "function": { "name": "do_a", "arguments": "{}" } + }] + }, + "finish_reason": "tool_calls" + }, + { + "delta": { + "tool_calls": [{ + "id": "call_b", + "index": 0, + "function": { "name": "do_b", "arguments": "{}" } + }] + }, + "finish_reason": "tool_calls" + } + ] + }); + + let body = build_body(&[payload]); + let events = collect_events(&body).await; + assert_matches!( + &events[..], + [ + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id: call_a, name: name_a, arguments: args_a, .. }), + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { call_id: call_b, name: name_b, arguments: args_b, .. }), + ResponseEvent::Completed { .. } + ] if call_a == "call_a" && name_a == "do_a" && args_a == "{}" && call_b == "call_b" && name_b == "do_b" && args_b == "{}" + ); + } + + #[tokio::test] + async fn merges_tool_calls_by_index_when_id_missing_on_subsequent_deltas() { + let delta_with_id = json!({ "choices": [{ "delta": { "tool_calls": [{ + "index": 0, "id": "call_a", - "function": { "name": "do_a" } + "function": { "name": "do_a", "arguments": "{ \"foo\":" } }] } }] }); - let delta_args_1 = json!({ + let delta_without_id = json!({ "choices": [{ "delta": { "tool_calls": [{ - "id": "call_a", - "function": { "arguments": "{ \"foo\":" } - }] - } - }] - }); - - let delta_args_2 = json!({ - "choices": [{ - "delta": { - "tool_calls": [{ - "id": "call_a", + "index": 0, "function": { "arguments": "1}" } }] } @@ -421,7 +545,7 @@ mod tests { }] }); - let body = build_body(&[delta_name, delta_args_1, delta_args_2, finish]); + let body = build_body(&[delta_with_id, delta_without_id, finish]); let events = collect_events(&body).await; assert_matches!( &events[..], @@ -432,6 +556,47 @@ mod tests { ); } + #[tokio::test] + async fn preserves_tool_call_name_when_empty_deltas_arrive() { + let delta_with_name = json!({ + "choices": [{ + "delta": { + "tool_calls": [{ + "id": "call_a", + "function": { "name": "do_a" } + }] + } + }] + }); + + let delta_with_empty_name = json!({ + "choices": [{ + "delta": { + "tool_calls": [{ + "id": "call_a", + "function": { "name": "", "arguments": "{}" } + }] + } + }] + }); + + let finish = json!({ + "choices": [{ + "finish_reason": "tool_calls" + }] + }); + + let body = build_body(&[delta_with_name, delta_with_empty_name, finish]); + let events = collect_events(&body).await; + assert_matches!( + &events[..], + [ + ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { name, arguments, .. }), + ResponseEvent::Completed { .. } + ] if name == "do_a" && arguments == "{}" + ); + } + #[tokio::test] async fn emits_tool_calls_even_when_content_and_reasoning_present() { let delta_content_and_tools = json!({ diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs new file mode 100644 index 0000000000..93baffd356 --- /dev/null +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -0,0 +1,124 @@ +use codex_api::AuthProvider; +use codex_api::ModelsClient; +use codex_api::provider::Provider; +use codex_api::provider::RetryConfig; +use codex_api::provider::WireApi; +use codex_client::ReqwestTransport; +use codex_protocol::openai_models::ClientVersion; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ModelsResponse; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::ReasoningSummaryFormat; +use codex_protocol::openai_models::TruncationPolicyConfig; +use http::HeaderMap; +use http::Method; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +#[derive(Clone, Default)] +struct DummyAuth; + +impl AuthProvider for DummyAuth { + fn bearer_token(&self) -> Option { + None + } +} + +fn provider(base_url: &str) -> Provider { + Provider { + name: "test".to_string(), + base_url: base_url.to_string(), + query_params: None, + wire: WireApi::Responses, + headers: HeaderMap::new(), + retry: RetryConfig { + max_attempts: 1, + base_delay: std::time::Duration::from_millis(1), + retry_429: false, + retry_5xx: true, + retry_transport: true, + }, + stream_idle_timeout: std::time::Duration::from_secs(1), + } +} + +#[tokio::test] +async fn models_client_hits_models_endpoint() { + let server = MockServer::start().await; + let base_url = format!("{}/api/codex", server.uri()); + + let response = ModelsResponse { + models: vec![ModelInfo { + slug: "gpt-test".to_string(), + display_name: "gpt-test".to_string(), + description: Some("desc".to_string()), + default_reasoning_level: ReasoningEffort::Medium, + supported_reasoning_levels: vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: ReasoningEffort::Low.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: ReasoningEffort::High.to_string(), + }, + ], + shell_type: ConfigShellToolType::ShellCommand, + visibility: ModelVisibility::List, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority: 1, + upgrade: None, + base_instructions: None, + supports_reasoning_summaries: false, + support_verbosity: false, + default_verbosity: None, + apply_patch_tool_type: None, + truncation_policy: TruncationPolicyConfig::bytes(10_000), + supports_parallel_tool_calls: false, + context_window: None, + reasoning_summary_format: ReasoningSummaryFormat::None, + experimental_supported_tools: Vec::new(), + }], + etag: String::new(), + }; + + Mock::given(method("GET")) + .and(path("/api/codex/models")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "application/json") + .set_body_json(&response), + ) + .mount(&server) + .await; + + let transport = ReqwestTransport::new(reqwest::Client::new()); + let client = ModelsClient::new(transport, provider(&base_url), DummyAuth); + + let result = client + .list_models("0.1.0", HeaderMap::new()) + .await + .expect("models request should succeed"); + + assert_eq!(result.models.len(), 1); + assert_eq!(result.models[0].slug, "gpt-test"); + + let received = server + .received_requests() + .await + .expect("should capture requests"); + assert_eq!(received.len(), 1); + assert_eq!(received[0].method, Method::GET.as_str()); + assert_eq!(received[0].url.path(), "/api/codex/models"); +} diff --git a/codex-rs/codex-client/src/default_client.rs b/codex-rs/codex-client/src/default_client.rs new file mode 100644 index 0000000000..8a25846385 --- /dev/null +++ b/codex-rs/codex-client/src/default_client.rs @@ -0,0 +1,143 @@ +use http::Error as HttpError; +use reqwest::IntoUrl; +use reqwest::Method; +use reqwest::Response; +use reqwest::header::HeaderMap; +use reqwest::header::HeaderName; +use reqwest::header::HeaderValue; +use serde::Serialize; +use std::collections::HashMap; +use std::fmt::Display; +use std::time::Duration; + +#[derive(Clone, Debug)] +pub struct CodexHttpClient { + inner: reqwest::Client, +} + +impl CodexHttpClient { + pub fn new(inner: reqwest::Client) -> Self { + Self { inner } + } + + pub fn get(&self, url: U) -> CodexRequestBuilder + where + U: IntoUrl, + { + self.request(Method::GET, url) + } + + pub fn post(&self, url: U) -> CodexRequestBuilder + where + U: IntoUrl, + { + self.request(Method::POST, url) + } + + pub fn request(&self, method: Method, url: U) -> CodexRequestBuilder + where + U: IntoUrl, + { + let url_str = url.as_str().to_string(); + CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str) + } +} + +#[must_use = "requests are not sent unless `send` is awaited"] +#[derive(Debug)] +pub struct CodexRequestBuilder { + builder: reqwest::RequestBuilder, + method: Method, + url: String, +} + +impl CodexRequestBuilder { + fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self { + Self { + builder, + method, + url, + } + } + + fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self { + Self { + builder: f(self.builder), + method: self.method, + url: self.url, + } + } + + pub fn headers(self, headers: HeaderMap) -> Self { + self.map(|builder| builder.headers(headers)) + } + + pub fn header(self, key: K, value: V) -> Self + where + HeaderName: TryFrom, + >::Error: Into, + HeaderValue: TryFrom, + >::Error: Into, + { + self.map(|builder| builder.header(key, value)) + } + + pub fn bearer_auth(self, token: T) -> Self + where + T: Display, + { + self.map(|builder| builder.bearer_auth(token)) + } + + pub fn timeout(self, timeout: Duration) -> Self { + self.map(|builder| builder.timeout(timeout)) + } + + pub fn json(self, value: &T) -> Self + where + T: ?Sized + Serialize, + { + self.map(|builder| builder.json(value)) + } + + pub async fn send(self) -> Result { + match self.builder.send().await { + Ok(response) => { + let request_ids = Self::extract_request_ids(&response); + tracing::debug!( + method = %self.method, + url = %self.url, + status = %response.status(), + request_ids = ?request_ids, + version = ?response.version(), + "Request completed" + ); + + Ok(response) + } + Err(error) => { + let status = error.status(); + tracing::debug!( + method = %self.method, + url = %self.url, + status = status.map(|s| s.as_u16()), + error = %error, + "Request failed" + ); + Err(error) + } + } + } + + fn extract_request_ids(response: &Response) -> HashMap { + ["cf-ray", "x-request-id", "x-oai-request-id"] + .iter() + .filter_map(|&name| { + let header_name = HeaderName::from_static(name); + let value = response.headers().get(header_name)?; + let value = value.to_str().ok()?.to_owned(); + Some((name.to_owned(), value)) + }) + .collect() + } +} diff --git a/codex-rs/codex-client/src/lib.rs b/codex-rs/codex-client/src/lib.rs index 3ac00a21a8..66d1083c07 100644 --- a/codex-rs/codex-client/src/lib.rs +++ b/codex-rs/codex-client/src/lib.rs @@ -1,3 +1,4 @@ +mod default_client; mod error; mod request; mod retry; @@ -5,6 +6,8 @@ mod sse; mod telemetry; mod transport; +pub use crate::default_client::CodexHttpClient; +pub use crate::default_client::CodexRequestBuilder; pub use crate::error::StreamError; pub use crate::error::TransportError; pub use crate::request::Request; diff --git a/codex-rs/codex-client/src/transport.rs b/codex-rs/codex-client/src/transport.rs index 5edc9a7b77..986ba3a679 100644 --- a/codex-rs/codex-client/src/transport.rs +++ b/codex-rs/codex-client/src/transport.rs @@ -1,3 +1,5 @@ +use crate::default_client::CodexHttpClient; +use crate::default_client::CodexRequestBuilder; use crate::error::TransportError; use crate::request::Request; use crate::request::Response; @@ -28,15 +30,17 @@ pub trait HttpTransport: Send + Sync { #[derive(Clone, Debug)] pub struct ReqwestTransport { - client: reqwest::Client, + client: CodexHttpClient, } impl ReqwestTransport { pub fn new(client: reqwest::Client) -> Self { - Self { client } + Self { + client: CodexHttpClient::new(client), + } } - fn build(&self, req: Request) -> Result { + fn build(&self, req: Request) -> Result { let mut builder = self .client .request( diff --git a/codex-rs/common/Cargo.toml b/codex-rs/common/Cargo.toml index 377d054483..25264eff09 100644 --- a/codex-rs/common/Cargo.toml +++ b/codex-rs/common/Cargo.toml @@ -9,12 +9,10 @@ workspace = true [dependencies] clap = { workspace = true, features = ["derive", "wrap_help"], optional = true } -codex-app-server-protocol = { workspace = true } codex-core = { workspace = true } codex-lmstudio = { workspace = true } codex-ollama = { workspace = true } codex-protocol = { workspace = true } -once_cell = { workspace = true } serde = { workspace = true, optional = true } toml = { workspace = true, optional = true } diff --git a/codex-rs/common/src/config_summary.rs b/codex-rs/common/src/config_summary.rs index 8fc1bb26f3..5a5901880f 100644 --- a/codex-rs/common/src/config_summary.rs +++ b/codex-rs/common/src/config_summary.rs @@ -4,23 +4,22 @@ use codex_core::config::Config; use crate::sandbox_summary::summarize_sandbox_policy; /// Build a list of key/value pairs summarizing the effective configuration. -pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> { +pub fn create_config_summary_entries(config: &Config, model: &str) -> Vec<(&'static str, String)> { let mut entries = vec![ ("workdir", config.cwd.display().to_string()), - ("model", config.model.clone()), + ("model", model.to_string()), ("provider", config.model_provider_id.clone()), ("approval", config.approval_policy.to_string()), ("sandbox", summarize_sandbox_policy(&config.sandbox_policy)), ]; - if config.model_provider.wire_api == WireApi::Responses - && config.model_family.supports_reasoning_summaries - { + if config.model_provider.wire_api == WireApi::Responses { let reasoning_effort = config .model_reasoning_effort - .or(config.model_family.default_reasoning_effort) - .map(|effort| effort.to_string()) - .unwrap_or_else(|| "none".to_string()); - entries.push(("reasoning effort", reasoning_effort)); + .map(|effort| effort.to_string()); + entries.push(( + "reasoning effort", + reasoning_effort.unwrap_or_else(|| "none".to_string()), + )); entries.push(( "reasoning summaries", config.model_reasoning_summary.to_string(), diff --git a/codex-rs/common/src/lib.rs b/codex-rs/common/src/lib.rs index 5092b3be24..d5513b8325 100644 --- a/codex-rs/common/src/lib.rs +++ b/codex-rs/common/src/lib.rs @@ -32,8 +32,6 @@ mod config_summary; pub use config_summary::create_config_summary_entries; // Shared fuzzy matcher (used by TUI selection popups and other UI filtering) pub mod fuzzy_match; -// Shared model presets used by TUI and MCP server -pub mod model_presets; // Shared approval presets (AskForApproval + Sandbox) used by TUI and MCP server // Not to be confused with AskForApproval, which we should probably rename to EscalationPolicy. pub mod approval_presets; diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 8a329d0672..4c231e4dda 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -1,8 +1,8 @@ [package] -name = "codex-core" -version.workspace = true edition.workspace = true license.workspace = true +name = "codex-core" +version.workspace = true [lib] doctest = false @@ -14,16 +14,16 @@ workspace = true [dependencies] anyhow = { workspace = true } -askama = { workspace = true } async-channel = { workspace = true } async-trait = { workspace = true } base64 = { workspace = true } -chrono = { workspace = true, features = ["serde"] } chardetng = { workspace = true } +chrono = { workspace = true, features = ["serde"] } +codex-api = { workspace = true } codex-app-server-protocol = { workspace = true } codex-apply-patch = { workspace = true } codex-async-utils = { workspace = true } -codex-api = { workspace = true } +codex-client = { workspace = true } codex-execpolicy = { workspace = true } codex-file-search = { workspace = true } codex-git = { workspace = true } @@ -31,14 +31,15 @@ codex-keyring-store = { workspace = true } codex-otel = { workspace = true, features = ["otel"] } codex-protocol = { workspace = true } codex-rmcp-client = { workspace = true } +codex-utils-absolute-path = { workspace = true } codex-utils-pty = { workspace = true } codex-utils-readiness = { workspace = true } codex-utils-string = { workspace = true } codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" } dirs = { workspace = true } dunce = { workspace = true } -env-flags = { workspace = true } encoding_rs = { workspace = true } +env-flags = { workspace = true } eventsource-stream = { workspace = true } futures = { workspace = true } http = { workspace = true } @@ -46,8 +47,10 @@ indexmap = { workspace = true } keyring = { workspace = true, features = ["crypto-rust"] } libc = { workspace = true } mcp-types = { workspace = true } +once_cell = { workspace = true } os_info = { workspace = true } rand = { workspace = true } +regex = { workspace = true } regex-lite = { workspace = true } reqwest = { workspace = true, features = ["json", "stream"] } serde = { workspace = true, features = ["derive"] } @@ -58,9 +61,6 @@ sha2 = { workspace = true } shlex = { workspace = true } similar = { workspace = true } strum_macros = { workspace = true } -url = { workspace = true } -once_cell = { workspace = true } -regex = { workspace = true } tempfile = { workspace = true } test-case = "3.3.1" test-log = { workspace = true } @@ -84,18 +84,20 @@ toml_edit = { workspace = true } tracing = { workspace = true, features = ["log"] } tree-sitter = { workspace = true } tree-sitter-bash = { workspace = true } +url = { workspace = true } uuid = { workspace = true, features = ["serde", "v4", "v5"] } which = { workspace = true } wildmatch = { workspace = true } [features] deterministic_process_ids = [] +test-support = [] [target.'cfg(target_os = "linux")'.dependencies] +keyring = { workspace = true, features = ["linux-native-async-persistent"] } landlock = { workspace = true } seccompiler = { workspace = true } -keyring = { workspace = true, features = ["linux-native-async-persistent"] } [target.'cfg(target_os = "macos")'.dependencies] core-foundation = "0.9" diff --git a/codex-rs/core/gpt-5.1-codex-max_prompt.md b/codex-rs/core/gpt-5.1-codex-max_prompt.md index 292e5d7d0f..a8227c893f 100644 --- a/codex-rs/core/gpt-5.1-codex-max_prompt.md +++ b/codex-rs/core/gpt-5.1-codex-max_prompt.md @@ -48,7 +48,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. - You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command. +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for - (for all of these, you should weigh alternative paths that do not require approval) @@ -59,8 +59,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. When requesting approval to execute a command that will require escalated privileges: - - Provide the `with_escalated_permissions` parameter with the boolean value true - - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter + - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` + - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter ## Special user requests diff --git a/codex-rs/core/gpt_5_1_prompt.md b/codex-rs/core/gpt_5_1_prompt.md index 97a3875fe5..3201ffeb68 100644 --- a/codex-rs/core/gpt_5_1_prompt.md +++ b/codex-rs/core/gpt_5_1_prompt.md @@ -182,7 +182,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. - You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters. Within this harness, prefer requesting approval via the tool over asking in natural language. +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. Within this harness, prefer requesting approval via the tool over asking in natural language. - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for - (for all of these, you should weigh alternative paths that do not require approval) @@ -193,8 +193,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. When requesting approval to execute a command that will require escalated privileges: - - Provide the `with_escalated_permissions` parameter with the boolean value true - - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter + - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` + - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter ## Validating your work diff --git a/codex-rs/core/gpt_5_2_prompt.md b/codex-rs/core/gpt_5_2_prompt.md new file mode 100644 index 0000000000..fdb1e3d5d3 --- /dev/null +++ b/codex-rs/core/gpt_5_2_prompt.md @@ -0,0 +1,370 @@ +You are GPT-5.2 running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful. + +Your capabilities: + +- Receive user prompts and other context provided by the harness, such as files in the workspace. +- Communicate with the user by streaming thinking & responses, and by making & updating plans. +- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section. + +Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI). + +# How you work + +## Personality + +Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work. + +## AGENTS.md spec +- Repos often contain AGENTS.md files. These files can appear anywhere within the repository. +- These files are a way for humans to give you (the agent) instructions or tips for working within the container. +- Some examples might be: coding conventions, info about how code is organized, or instructions for how to run or test code. +- Instructions in AGENTS.md files: + - The scope of an AGENTS.md file is the entire directory tree rooted at the folder that contains it. + - For every file you touch in the final patch, you must obey instructions in any AGENTS.md file whose scope includes that file. + - Instructions about code style, structure, naming, etc. apply only to code within the AGENTS.md file's scope, unless the file states otherwise. + - More-deeply-nested AGENTS.md files take precedence in the case of conflicting instructions. + - Direct system/developer/user instructions (as part of a prompt) take precedence over AGENTS.md instructions. +- The contents of the AGENTS.md file at the root of the repo and any directories from the CWD up to the root are included with the developer message and don't need to be re-read. When working in a subdirectory of CWD, or a directory outside the CWD, check for any AGENTS.md files that may be applicable. + +## Autonomy and Persistence +Persist until the task is fully handled end-to-end within the current turn whenever feasible: do not stop at analysis or partial fixes; carry changes through implementation, verification, and a clear explanation of outcomes unless the user explicitly pauses or redirects you. + +Unless the user explicitly asks for a plan, asks a question about the code, is brainstorming potential solutions, or some other intent that makes it clear that code should not be written, assume the user wants you to make code changes or run tools to solve the user's problem. In these cases, it's bad to output your proposed solution in a message, you should go ahead and actually implement the change. If you encounter challenges or blockers, you should attempt to resolve them yourself. + +## Responsiveness + +### User Updates Spec +You'll work for stretches with tool calls — it's critical to keep the user updated as you work. + +Frequency & Length: +- Send short updates (1–2 sentences) whenever there is a meaningful, important insight you need to share with the user to keep them informed. +- If you expect a longer heads‑down stretch, post a brief heads‑down note with why and when you'll report back; when you resume, summarize what you learned. +- Only the initial plan, plan updates, and final recap can be longer, with multiple bullets and paragraphs + +Tone: +- Friendly, confident, senior-engineer energy. Positive, collaborative, humble; fix mistakes quickly. + +Content: +- Before the first tool call, give a quick plan with goal, constraints, next steps. +- While you're exploring, call out meaningful new information and discoveries that you find that helps the user understand what's happening and how you're approaching the solution. +- If you change the plan (e.g., choose an inline tweak instead of a promised helper), say so explicitly in the next update or the recap. + +**Examples:** + +- “I’ve explored the repo; now checking the API route definitions.” +- “Next, I’ll patch the config and update the related tests.” +- “I’m about to scaffold the CLI commands and helper functions.” +- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.” +- “Config’s looking tidy. Next up is patching helpers to keep things in sync.” +- “Finished poking at the DB gateway. I will now chase down error handling.” +- “Alright, build pipeline order is interesting. Checking how it reports failures.” +- “Spotted a clever caching util; now hunting where it gets used.” + +## Planning + +You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go. + +Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately. + +Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step. + +Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so. + +Maintain statuses in the tool: exactly one item in_progress at a time; mark items complete when done; post timely status transitions. Do not jump an item from pending to completed: always set it to in_progress first. Do not batch-complete multiple items after the fact. Finish with all items completed or explicitly canceled/deferred before ending the turn. Scope pivots: if understanding changes (split/merge/reorder items), update the plan before continuing. Do not let the plan go stale while coding. + +Use a plan when: + +- The task is non-trivial and will require multiple actions over a long time horizon. +- There are logical phases or dependencies where sequencing matters. +- The work has ambiguity that benefits from outlining high-level goals. +- You want intermediate checkpoints for feedback and validation. +- When the user asked you to do more than one thing in a single prompt +- The user has asked you to use the plan tool (aka "TODOs") +- You generate additional steps while working, and plan to do them before yielding to the user + +### Examples + +**High-quality plans** + +Example 1: + +1. Add CLI entry with file args +2. Parse Markdown via CommonMark library +3. Apply semantic HTML template +4. Handle code blocks, images, links +5. Add error handling for invalid files + +Example 2: + +1. Define CSS variables for colors +2. Add toggle with localStorage state +3. Refactor components to use variables +4. Verify all views for readability +5. Add smooth theme-change transition + +Example 3: + +1. Set up Node.js + WebSocket server +2. Add join/leave broadcast events +3. Implement messaging with timestamps +4. Add usernames + mention highlighting +5. Persist messages in lightweight DB +6. Add typing indicators + unread count + +**Low-quality plans** + +Example 1: + +1. Create CLI tool +2. Add Markdown parser +3. Convert to HTML + +Example 2: + +1. Add dark mode toggle +2. Save preference +3. Make styles look good + +Example 3: + +1. Create single-file HTML game +2. Run quick sanity check +3. Summarize usage instructions + +If you need to write a plan, only write high quality plans, not low quality ones. + +## Task execution + +You are a coding agent. You must keep going until the query or task is completely resolved, before ending your turn and yielding back to the user. Persist until the task is fully handled end-to-end within the current turn whenever feasible and persevere even when function calls fail. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer. + +You MUST adhere to the following criteria when solving queries: + +- Working on the repo(s) in the current environment is allowed, even if they are proprietary. +- Analyzing code for vulnerabilities is allowed. +- Showing user code and tool call details is allowed. +- Use the `apply_patch` tool to edit files (NEVER try `applypatch` or `apply-patch`, only `apply_patch`). This is a FREEFORM tool, so do not wrap the patch in JSON. + +If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines: + +- Fix the problem at the root cause rather than applying surface-level patches, when possible. +- Avoid unneeded complexity in your solution. +- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.) +- Update documentation as necessary. +- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task. +- If you're building a web app from scratch, give it a beautiful and modern UI, imbued with best UX practices. +- Use `git log` and `git blame` to search the history of the codebase if additional context is required. +- NEVER add copyright or license headers unless specifically requested. +- Do not waste tokens by re-reading files after calling `apply_patch` on them. The tool call will fail if it didn't work. The same goes for making folders, deleting folders, etc. +- Do not `git commit` your changes or create new git branches unless explicitly requested. +- Do not add inline comments within code unless explicitly requested. +- Do not use one-letter variable names unless explicitly requested. +- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. + +## Codex CLI harness, sandboxing, and approvals + +The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. + +Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: +- **read-only**: The sandbox only permits reading files. +- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. +- **danger-full-access**: No filesystem sandboxing - all commands are permitted. + +Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: +- **restricted**: Requires approval +- **enabled**: No approval needed + +Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are +- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. +- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. +- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for escalating in the tool definition.) +- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. + +When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: +- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) +- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. +- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. +- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for +- (for all of these, you should weigh alternative paths that do not require approval) + +When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. + +You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. + +Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. + +When requesting approval to execute a command that will require escalated privileges: + - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` + - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter + +## Validating your work + +If the codebase has tests, or the ability to build or run tests, consider using them to verify changes once your work is complete. + +When testing, your philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests. + +Similarly, once you're confident in correctness, you can suggest or use formatting commands to ensure that your code is well formatted. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one. + +For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.) + +Be mindful of whether to run validation commands proactively. In the absence of behavioral guidance: + +- When running in non-interactive approval modes like **never** or **on-failure**, you can proactively run tests, lint and do whatever you need to ensure you've completed the task. If you are unable to run tests, you must still do your utmost best to complete the task. +- When working in interactive approval modes like **untrusted**, or **on-request**, hold off on running tests or lint commands until the user is ready for you to finalize your output, because these commands take time to run and slow down iteration. Instead suggest what you want to do next, and let the user confirm first. +- When working on test-related tasks, such as adding tests, fixing tests, or reproducing a bug to verify behavior, you may proactively run tests regardless of approval mode. Use your judgement to decide whether this is a test-related task. + +## Ambition vs. precision + +For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation. + +If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature. + +You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified. + +## Sharing progress updates + +For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next. + +Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. Don't start editing or writing large files before informing the user what you are doing and why. + +The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along. + +## Presenting your work and final message + +Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges. + +You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation. + +The user is working on the same computer as you, and has access to your work. As such there's no need to show the contents of files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using `apply_patch`, there's no need to tell users to "save the file" or "copy the code into a file"—just reference the file path. + +If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do (even with approval) but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly. + +Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding. + +### Final answer structure and style guidelines + +You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value. + +**Section Headers** + +- Use only when they improve clarity — they are not mandatory for every answer. +- Choose descriptive names that fit the content +- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**` +- Leave no blank line before the first bullet under a header. +- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer. + +**Bullets** + +- Use `-` followed by a space for every bullet. +- Merge related points when possible; avoid a bullet for every trivial detail. +- Keep bullets to one line unless breaking for clarity is unavoidable. +- Group into short lists (4–6 bullets) ordered by importance. +- Use consistent keyword phrasing and formatting across sections. + +**Monospace** + +- Wrap all commands, file paths, env vars, code identifiers, and code samples in backticks (`` `...` ``). +- Apply to inline examples and to bullet keywords if the keyword itself is a literal file/command. +- Never mix monospace and bold markers; choose one based on whether it’s a keyword (`**`) or inline code/path (`` ` ``). + +**File References** +When referencing files in your response, make sure to include the relevant start line and always follow the below rules: + * Use inline code to make file paths clickable. + * Each reference should have a stand alone path. Even if it's the same file. + * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix. + * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1). + * Do not use URIs like file://, vscode://, or https://. + * Do not provide range of lines + * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5 + +**Structure** + +- Place related bullets together; don’t mix unrelated concepts in the same section. +- Order sections from general → specific → supporting info. +- For subsections (e.g., “Binaries” under “Rust Workspace”), introduce with a bolded keyword bullet, then list items under it. +- Match structure to complexity: + - Multi-part or detailed results → use clear headers and grouped bullets. + - Simple results → minimal headers, possibly just a short list or paragraph. + +**Tone** + +- Keep the voice collaborative and natural, like a coding partner handing off work. +- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition +- Use present tense and active voice (e.g., “Runs tests” not “This will run tests”). +- Keep descriptions self-contained; don’t refer to “above” or “below”. +- Use parallel structure in lists for consistency. + +**Verbosity** +- Final answer compactness rules (enforced): + - Tiny/small single-file change (≤ ~10 lines): 2–5 sentences or ≤3 bullets. No headings. 0–1 short snippet (≤3 lines) only if essential. + - Medium change (single area or a few files): ≤6 bullets or 6–10 sentences. At most 1–2 short snippets total (≤8 lines each). + - Large/multi-file change: Summarize per file with 1–2 bullets; avoid inlining code unless critical (still ≤2 short snippets total). + - Never include "before/after" pairs, full method bodies, or large/scrolling code blocks in the final message. Prefer referencing file/symbol names instead. + +**Don’t** + +- Don’t use literal words “bold” or “monospace” in the content. +- Don’t nest bullets or create deep hierarchies. +- Don’t output ANSI escape codes directly — the CLI renderer applies them. +- Don’t cram unrelated keywords into a single bullet; split for clarity. +- Don’t let keyword lists run long — wrap or reformat for scanability. + +Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable. + +For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting. + +# Tool Guidelines + +## Shell commands + +When using the shell, you must adhere to the following guidelines: + +- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.) +- Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes, regardless of the command used. +- Parallelize tool calls whenever possible - especially file reads, such as `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`. Use `multi_tool_use.parallel` to parallelize tool calls and only this. + +## apply_patch + +Use the `apply_patch` tool to edit files. Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope: + +*** Begin Patch +[ one or more file sections ] +*** End Patch + +Within that envelope, you get a sequence of file operations. +You MUST include a header to specify the action you are taking. +Each operation starts with one of three headers: + +*** Add File: - create a new file. Every following line is a + line (the initial contents). +*** Delete File: - remove an existing file. Nothing follows. +*** Update File: - patch an existing file in place (optionally with a rename). + +Example patch: + +``` +*** Begin Patch +*** Add File: hello.txt ++Hello world +*** Update File: src/app.py +*** Move to: src/main.py +@@ def greet(): +-print("Hi") ++print("Hello, world!") +*** Delete File: obsolete.txt +*** End Patch +``` + +It is important to remember: + +- You must include a header with your intended action (Add/Delete/Update) +- You must prefix new lines with `+` even when creating a new file + +## `update_plan` + +A tool named `update_plan` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task. + +To create a new plan, call `update_plan` with a short list of 1‑sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`). + +When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call. + +If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`. diff --git a/codex-rs/core/gpt_5_codex_prompt.md b/codex-rs/core/gpt_5_codex_prompt.md index 57d06761ba..e2f9017874 100644 --- a/codex-rs/core/gpt_5_codex_prompt.md +++ b/codex-rs/core/gpt_5_codex_prompt.md @@ -48,7 +48,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. - You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command. +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for - (for all of these, you should weigh alternative paths that do not require approval) @@ -59,8 +59,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. When requesting approval to execute a command that will require escalated privileges: - - Provide the `with_escalated_permissions` parameter with the boolean value true - - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter + - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` + - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter ## Special user requests diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index dffe94be61..67433303e5 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -70,7 +70,9 @@ pub(crate) async fn apply_patch( ) .await; match rx_approve.await.unwrap_or_default() { - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { + ReviewDecision::Approved + | ReviewDecision::ApprovedExecpolicyAmendment { .. } + | ReviewDecision::ApprovedForSession => { InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec { action, user_explicitly_approved_this_action: true, diff --git a/codex-rs/core/src/auth.rs b/codex-rs/core/src/auth.rs index d874435e8e..20943982d4 100644 --- a/codex-rs/core/src/auth.rs +++ b/codex-rs/core/src/auth.rs @@ -23,7 +23,6 @@ pub use crate::auth::storage::AuthDotJson; use crate::auth::storage::AuthStorageBackend; use crate::auth::storage::create_auth_storage; use crate::config::Config; -use crate::default_client::CodexHttpClient; use crate::error::RefreshTokenFailedError; use crate::error::RefreshTokenFailedReason; use crate::token_data::KnownPlan as InternalKnownPlan; @@ -31,8 +30,11 @@ use crate::token_data::PlanType as InternalPlanType; use crate::token_data::TokenData; use crate::token_data::parse_id_token; use crate::util::try_parse_error_message; +use codex_client::CodexHttpClient; use codex_protocol::account::PlanType as AccountPlanType; +use once_cell::sync::Lazy; use serde_json::Value; +use tempfile::TempDir; use thiserror::Error; #[derive(Debug, Clone)] @@ -62,6 +64,8 @@ const REFRESH_TOKEN_UNKNOWN_MESSAGE: &str = const REFRESH_TOKEN_URL: &str = "https://auth.openai.com/oauth/token"; pub const REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR: &str = "CODEX_REFRESH_TOKEN_URL_OVERRIDE"; +static TEST_AUTH_TEMP_DIRS: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + #[derive(Debug, Error)] pub enum RefreshTokenError { #[error("{0}")] @@ -227,23 +231,6 @@ impl CodexAuth { }) } - /// Raw plan string from the ID token (including unknown/new plan types). - pub fn raw_plan_type(&self) -> Option { - self.get_plan_type().map(|plan| match plan { - InternalPlanType::Known(k) => format!("{k:?}"), - InternalPlanType::Unknown(raw) => raw, - }) - } - - /// Raw internal plan value from the ID token. - /// Exposes the underlying `token_data::PlanType` without mapping it to the - /// public `AccountPlanType`. Use this when downstream code needs to inspect - /// internal/unknown plan strings exactly as issued in the token. - pub(crate) fn get_plan_type(&self) -> Option { - self.get_current_token_data() - .and_then(|t| t.id_token.chatgpt_plan_type) - } - fn get_current_auth_json(&self) -> Option { #[expect(clippy::unwrap_used)] self.auth_dot_json.lock().unwrap().clone() @@ -1041,10 +1028,6 @@ mod tests { .expect("auth available"); pretty_assertions::assert_eq!(auth.account_plan_type(), Some(AccountPlanType::Pro)); - pretty_assertions::assert_eq!( - auth.get_plan_type(), - Some(InternalPlanType::Known(InternalKnownPlan::Pro)) - ); } #[test] @@ -1065,10 +1048,6 @@ mod tests { .expect("auth available"); pretty_assertions::assert_eq!(auth.account_plan_type(), Some(AccountPlanType::Unknown)); - pretty_assertions::assert_eq!( - auth.get_plan_type(), - Some(InternalPlanType::Unknown("mystery-tier".to_string())) - ); } } @@ -1113,11 +1092,19 @@ impl AuthManager { } } + #[cfg(any(test, feature = "test-support"))] + #[expect(clippy::expect_used)] /// Create an AuthManager with a specific CodexAuth, for testing only. pub fn from_auth_for_testing(auth: CodexAuth) -> Arc { let cached = CachedAuth { auth: Some(auth) }; + let temp_dir = tempfile::tempdir().expect("temp codex home"); + let codex_home = temp_dir.path().to_path_buf(); + TEST_AUTH_TEMP_DIRS + .lock() + .expect("lock test codex homes") + .push(temp_dir); Arc::new(Self { - codex_home: PathBuf::new(), + codex_home, inner: RwLock::new(cached), enable_codex_api_key_env: false, auth_credentials_store_mode: AuthCredentialsStoreMode::File, @@ -1129,6 +1116,10 @@ impl AuthManager { self.inner.read().ok().and_then(|c| c.auth.clone()) } + pub fn codex_home(&self) -> &Path { + &self.codex_home + } + /// Force a reload of the auth information from auth.json. Returns /// whether the auth value changed. pub fn reload(&self) -> bool { @@ -1201,4 +1192,8 @@ impl AuthManager { self.reload(); Ok(removed) } + + pub fn get_auth_mode(&self) -> Option { + self.auth().map(|a| a.mode) + } } diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 82839522c9..9659b10909 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -20,9 +20,9 @@ use codex_api::error::ApiError; use codex_app_server_protocol::AuthMode; use codex_otel::otel_event_manager::OtelEventManager; use codex_protocol::ConversationId; -use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::protocol::SessionSource; use eventsource_stream::Event; use eventsource_stream::EventStreamError; @@ -46,10 +46,9 @@ use crate::default_client::build_reqwest_client; use crate::error::CodexErr; use crate::error::Result; use crate::flags::CODEX_RS_SSE_FIXTURE; -use crate::model_family::ModelFamily; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; -use crate::openai_model_info::get_model_info; +use crate::openai_models::model_family::ModelFamily; use crate::tools::spec::create_tools_json_for_chat_completions_api; use crate::tools::spec::create_tools_json_for_responses_api; @@ -57,6 +56,7 @@ use crate::tools::spec::create_tools_json_for_responses_api; pub struct ModelClient { config: Arc, auth_manager: Option>, + model_family: ModelFamily, otel_event_manager: OtelEventManager, provider: ModelProviderInfo, conversation_id: ConversationId, @@ -70,6 +70,7 @@ impl ModelClient { pub fn new( config: Arc, auth_manager: Option>, + model_family: ModelFamily, otel_event_manager: OtelEventManager, provider: ModelProviderInfo, effort: Option, @@ -80,6 +81,7 @@ impl ModelClient { Self { config, auth_manager, + model_family, otel_event_manager, provider, conversation_id, @@ -90,17 +92,11 @@ impl ModelClient { } pub fn get_model_context_window(&self) -> Option { - let pct = self.config.model_family.effective_context_window_percent; - self.config - .model_context_window - .or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window)) - .map(|w| w.saturating_mul(pct) / 100) - } - - pub fn get_auto_compact_token_limit(&self) -> Option { - self.config.model_auto_compact_token_limit.or_else(|| { - get_model_info(&self.config.model_family).and_then(|info| info.auto_compact_token_limit) - }) + let model_family = self.get_model_family(); + let effective_context_window_percent = model_family.effective_context_window_percent; + model_family + .context_window + .map(|w| w.saturating_mul(effective_context_window_percent) / 100) } pub fn config(&self) -> Arc { @@ -149,9 +145,8 @@ impl ModelClient { } let auth_manager = self.auth_manager.clone(); - let instructions = prompt - .get_full_instructions(&self.config.model_family) - .into_owned(); + let model_family = self.get_model_family(); + let instructions = prompt.get_full_instructions(&model_family).into_owned(); let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?; let api_prompt = build_api_prompt(prompt, instructions, tools_json); let conversation_id = self.conversation_id.to_string(); @@ -171,7 +166,7 @@ impl ModelClient { let stream_result = client .stream_prompt( - &self.config.model, + &self.get_model(), &api_prompt, Some(conversation_id.clone()), Some(session_source.clone()), @@ -204,17 +199,18 @@ impl ModelClient { } let auth_manager = self.auth_manager.clone(); - let instructions = prompt - .get_full_instructions(&self.config.model_family) - .into_owned(); + let model_family = self.get_model_family(); + let instructions = prompt.get_full_instructions(&model_family).into_owned(); let tools_json: Vec = create_tools_json_for_responses_api(&prompt.tools)?; - let reasoning = if self.config.model_family.supports_reasoning_summaries { + let reasoning = if model_family.supports_reasoning_summaries { Some(Reasoning { - effort: self - .effort - .or(self.config.model_family.default_reasoning_effort), - summary: Some(self.summary), + effort: self.effort.or(model_family.default_reasoning_effort), + summary: if self.summary == ReasoningSummaryConfig::None { + None + } else { + Some(self.summary) + }, }) } else { None @@ -226,15 +222,15 @@ impl ModelClient { vec![] }; - let verbosity = if self.config.model_family.support_verbosity { + let verbosity = if model_family.support_verbosity { self.config .model_verbosity - .or(self.config.model_family.default_verbosity) + .or(model_family.default_verbosity) } else { if self.config.model_verbosity.is_some() { warn!( "model_verbosity is set but ignored as the model does not support verbosity: {}", - self.config.model_family.family + model_family.family ); } None @@ -268,7 +264,7 @@ impl ModelClient { }; let stream_result = client - .stream_prompt(&self.config.model, &api_prompt, options) + .stream_prompt(&self.get_model(), &api_prompt, options) .await; match stream_result { @@ -300,12 +296,12 @@ impl ModelClient { /// Returns the currently configured model slug. pub fn get_model(&self) -> String { - self.config.model.clone() + self.get_model_family().get_model_slug().to_string() } /// Returns the currently configured model family. pub fn get_model_family(&self) -> ModelFamily { - self.config.model_family.clone() + self.model_family.clone() } /// Returns the current reasoning effort setting. @@ -342,10 +338,10 @@ impl ModelClient { .with_telemetry(Some(request_telemetry)); let instructions = prompt - .get_full_instructions(&self.config.model_family) + .get_full_instructions(&self.get_model_family()) .into_owned(); let payload = ApiCompactionInput { - model: &self.config.model, + model: &self.get_model(), input: &prompt.input, instructions: &instructions, }; diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index a249ca6fcc..4a3bc8de23 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -1,6 +1,6 @@ use crate::client_common::tools::ToolSpec; use crate::error::Result; -use crate::model_family::ModelFamily; +use crate::openai_models::model_family::ModelFamily; pub use codex_api::common::ResponseEvent; use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS; use codex_protocol::models::ResponseItem; @@ -252,13 +252,15 @@ impl Stream for ResponseStream { #[cfg(test)] mod tests { - use crate::model_family::find_family_for_model; use codex_api::ResponsesApiRequest; use codex_api::common::OpenAiVerbosity; use codex_api::common::TextControls; use codex_api::create_text_param_for_request; use pretty_assertions::assert_eq; + use crate::config::test_config; + use crate::openai_models::models_manager::ModelsManager; + use super::*; struct InstructionsTestCase { @@ -309,7 +311,9 @@ mod tests { }, ]; for test_case in test_cases { - let model_family = find_family_for_model(test_case.slug).expect("known model slug"); + let config = test_config(); + let model_family = + ModelsManager::construct_model_family_offline(test_case.slug, &config); let expected = if test_case.expects_apply_patch_instructions { format!( "{}\n{}", diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 682861d648..e23e03298d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -11,11 +11,16 @@ use crate::compact; use crate::compact::run_inline_auto_compact_task; use crate::compact::should_use_remote_compact_task; use crate::compact_remote::run_inline_remote_auto_compact_task; +use crate::exec_policy::load_exec_policy_for_features; use crate::features::Feature; -use crate::function_tool::FunctionCallError; +use crate::features::Features; +use crate::openai_models::model_family::ModelFamily; +use crate::openai_models::models_manager::ModelsManager; use crate::parse_command::parse_command; use crate::parse_turn_item; -use crate::response_processing::process_items; +use crate::stream_events_utils::HandleOutputCtx; +use crate::stream_events_utils::handle_non_tool_response_item; +use crate::stream_events_utils::handle_output_item_done; use crate::terminal; use crate::truncate::TruncationPolicy; use crate::user_notification::UserNotifier; @@ -23,6 +28,7 @@ use crate::util::error_or_panic; use async_channel::Receiver; use async_channel::Sender; use codex_protocol::ConversationId; +use codex_protocol::approvals::ExecPolicyAmendment; use codex_protocol::items::TurnItem; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::HasLegacyEvent; @@ -71,10 +77,9 @@ use crate::error::CodexErr; use crate::error::Result as CodexResult; #[cfg(test)] use crate::exec::StreamOutput; +use crate::exec_policy::ExecPolicyUpdateError; use crate::mcp::auth::compute_auth_statuses; use crate::mcp_connection_manager::McpConnectionManager; -use crate::model_family::find_family_for_model; -use crate::openai_model_info::get_model_info; use crate::project_doc::get_user_instructions; use crate::protocol::AgentMessageContentDeltaEvent; use crate::protocol::AgentReasoningSectionBreakEvent; @@ -90,9 +95,11 @@ use crate::protocol::RateLimitSnapshot; use crate::protocol::ReasoningContentDeltaEvent; use crate::protocol::ReasoningRawContentDeltaEvent; use crate::protocol::ReviewDecision; -use crate::protocol::SandboxCommandAssessment; use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; +use crate::protocol::SkillErrorInfo; +use crate::protocol::SkillInfo; +use crate::protocol::SkillLoadOutcomeInfo; use crate::protocol::StreamErrorEvent; use crate::protocol::Submission; use crate::protocol::TokenCountEvent; @@ -104,6 +111,11 @@ use crate::rollout::RolloutRecorder; use crate::rollout::RolloutRecorderParams; use crate::rollout::map_session_init_error; use crate::shell; +use crate::shell_snapshot::ShellSnapshot; +use crate::skills::SkillInjections; +use crate::skills::SkillLoadOutcome; +use crate::skills::build_skill_injections; +use crate::skills::load_skills; use crate::state::ActiveTurn; use crate::state::SessionServices; use crate::state::SessionState; @@ -126,12 +138,11 @@ use crate::util::backoff; use codex_async_utils::OrCancelExt; use codex_execpolicy::Policy as ExecPolicy; use codex_otel::otel_event_manager::OtelEventManager; -use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::ContentItem; -use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::InitialHistory; use codex_protocol::user_input::UserInput; @@ -162,23 +173,54 @@ impl Codex { pub async fn spawn( config: Config, auth_manager: Arc, + models_manager: Arc, conversation_history: InitialHistory, session_source: SessionSource, ) -> CodexResult { let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY); let (tx_event, rx_event) = async_channel::unbounded(); - let user_instructions = get_user_instructions(&config).await; + let loaded_skills = if config.features.enabled(Feature::Skills) { + Some(load_skills(&config)) + } else { + None + }; - let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home) + if let Some(outcome) = &loaded_skills { + for err in &outcome.errors { + error!( + "failed to load skill {}: {}", + err.path.display(), + err.message + ); + } + } + + let skills_outcome = loaded_skills.clone(); + + let user_instructions = get_user_instructions( + &config, + skills_outcome + .as_ref() + .map(|outcome| outcome.skills.as_slice()), + ) + .await; + + let exec_policy = load_exec_policy_for_features(&config.features, &config.codex_home) .await .map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?; + let exec_policy = Arc::new(RwLock::new(exec_policy)); let config = Arc::new(config); - + if config.features.enabled(Feature::RemoteModels) + && let Err(err) = models_manager.refresh_available_models(&config).await + { + error!("failed to refresh available models: {err:?}"); + } + let model = models_manager.get_model(&config.model, &config).await; let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), - model: config.model.clone(), + model: model.clone(), model_reasoning_effort: config.model_reasoning_effort, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), @@ -189,20 +231,22 @@ impl Codex { sandbox_policy: config.sandbox_policy.clone(), cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), - features: config.features.clone(), exec_policy, session_source, }; // Generate a unique ID for the lifetime of this Codex session. let session_source_clone = session_configuration.session_source.clone(); + let session = Session::new( session_configuration, config.clone(), auth_manager.clone(), + models_manager.clone(), tx_event.clone(), conversation_history, session_source_clone, + skills_outcome.clone(), ) .await .map_err(|e| { @@ -263,6 +307,9 @@ pub(crate) struct Session { conversation_id: ConversationId, tx_event: Sender, state: Mutex, + /// The set of enabled features should be invariant for the lifetime of the + /// session. + features: Features, pub(crate) active_turn: Mutex>, pub(crate) services: SessionServices, next_internal_sub_id: AtomicU64, @@ -288,7 +335,7 @@ pub(crate) struct TurnContext { pub(crate) final_output_json_schema: Option, pub(crate) codex_linux_sandbox_exe: Option, pub(crate) tool_call_gate: Arc, - pub(crate) exec_policy: Arc, + pub(crate) exec_policy: Arc>, pub(crate) truncation_policy: TruncationPolicy, } @@ -343,10 +390,8 @@ pub(crate) struct SessionConfiguration { /// operate deterministically. cwd: PathBuf, - /// Set of feature flags for this session - features: Features, /// Execpolicy policy, applied only when enabled by feature flag. - exec_policy: Arc, + exec_policy: Arc>, // TODO(pakrym): Remove config from here original_config_do_not_use: Arc, @@ -391,34 +436,38 @@ pub(crate) struct SessionSettingsUpdate { } impl Session { + /// Don't expand the number of mutated arguments on config. We are in the process of getting rid of it. + fn build_per_turn_config(session_configuration: &SessionConfiguration) -> Config { + // todo(aibrahim): store this state somewhere else so we don't need to mut config + let config = session_configuration.original_config_do_not_use.clone(); + let mut per_turn_config = (*config).clone(); + per_turn_config.model_reasoning_effort = session_configuration.model_reasoning_effort; + per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary; + per_turn_config.features = config.features.clone(); + per_turn_config + } + + #[allow(clippy::too_many_arguments)] fn make_turn_context( auth_manager: Option>, otel_event_manager: &OtelEventManager, provider: ModelProviderInfo, session_configuration: &SessionConfiguration, + per_turn_config: Config, + model_family: ModelFamily, conversation_id: ConversationId, sub_id: String, ) -> TurnContext { - let config = session_configuration.original_config_do_not_use.clone(); - let model_family = find_family_for_model(&session_configuration.model) - .unwrap_or_else(|| config.model_family.clone()); - let mut per_turn_config = (*config).clone(); - per_turn_config.model = session_configuration.model.clone(); - per_turn_config.model_family = model_family.clone(); - per_turn_config.model_reasoning_effort = session_configuration.model_reasoning_effort; - per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary; - if let Some(model_info) = get_model_info(&model_family) { - per_turn_config.model_context_window = Some(model_info.context_window); - } - let otel_event_manager = otel_event_manager.clone().with_model( session_configuration.model.as_str(), - session_configuration.model.as_str(), + model_family.get_model_slug(), ); + let per_turn_config = Arc::new(per_turn_config); let client = ModelClient::new( - Arc::new(per_turn_config.clone()), + per_turn_config.clone(), auth_manager, + model_family.clone(), otel_event_manager, provider, session_configuration.model_reasoning_effort, @@ -429,7 +478,7 @@ impl Session { let tools_config = ToolsConfig::new(&ToolsConfigParams { model_family: &model_family, - features: &config.features, + features: &per_turn_config.features, }); TurnContext { @@ -442,23 +491,29 @@ impl Session { user_instructions: session_configuration.user_instructions.clone(), approval_policy: session_configuration.approval_policy, sandbox_policy: session_configuration.sandbox_policy.clone(), - shell_environment_policy: config.shell_environment_policy.clone(), + shell_environment_policy: per_turn_config.shell_environment_policy.clone(), tools_config, final_output_json_schema: None, - codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(), + codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), exec_policy: session_configuration.exec_policy.clone(), - truncation_policy: TruncationPolicy::new(&per_turn_config), + truncation_policy: TruncationPolicy::new( + per_turn_config.as_ref(), + model_family.truncation_policy, + ), } } + #[allow(clippy::too_many_arguments)] async fn new( session_configuration: SessionConfiguration, config: Arc, auth_manager: Arc, + models_manager: Arc, tx_event: Sender, initial_history: InitialHistory, session_source: SessionSource, + skills: Option, ) -> anyhow::Result> { debug!( "Configuring session: model={}; provider={:?}", @@ -496,7 +551,6 @@ impl Session { // - load history metadata let rollout_fut = RolloutRecorder::new(&config, rollout_params); - let default_shell = shell::default_user_shell(); let history_meta_fut = crate::message_history::history_metadata(&config); let auth_statuses_fut = compute_auth_statuses( config.mcp_servers.iter(), @@ -515,9 +569,9 @@ impl Session { let mut post_session_configured_events = Vec::::new(); - for (alias, feature) in session_configuration.features.legacy_feature_usages() { + for (alias, feature) in config.features.legacy_feature_usages() { let canonical = feature.key(); - let summary = format!("`{alias}` is deprecated. Use `{canonical}` instead."); + let summary = format!("`{alias}` is deprecated. Use `[features].{canonical}` instead."); let details = if alias == canonical { None } else { @@ -531,10 +585,11 @@ impl Session { }); } + // todo(aibrahim): why are we passing model here while it can change? let otel_event_manager = OtelEventManager::new( conversation_id, - config.model.as_str(), - config.model_family.slug.as_str(), + session_configuration.model.as_str(), + session_configuration.model.as_str(), auth_manager.auth().and_then(|a| a.get_account_id()), auth_manager.auth().and_then(|a| a.get_account_email()), auth_manager.auth().map(|a| a.mode), @@ -554,7 +609,14 @@ impl Session { config.active_profile.clone(), ); + let mut default_shell = shell::default_user_shell(); // Create the mutable state for the Session. + if config.features.enabled(Feature::ShellSnapshot) { + default_shell.shell_snapshot = + ShellSnapshot::try_new(&config.codex_home, &default_shell) + .await + .map(Arc::new); + } let state = SessionState::new(session_configuration.clone()); let services = SessionServices { @@ -563,17 +625,20 @@ impl Session { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(config.notify.clone()), rollout: Mutex::new(Some(rollout_recorder)), - user_shell: default_shell, + user_shell: Arc::new(default_shell), show_raw_agent_reasoning: config.show_raw_agent_reasoning, auth_manager: Arc::clone(&auth_manager), otel_event_manager, + models_manager: Arc::clone(&models_manager), tool_approvals: Mutex::new(ApprovalStore::default()), + skills: skills.clone(), }; let sess = Arc::new(Session { conversation_id, tx_event: tx_event.clone(), state: Mutex::new(state), + features: config.features.clone(), active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), @@ -582,6 +647,7 @@ impl Session { // Dispatch the SessionConfiguredEvent first and then report any errors. // If resuming, include converted initial messages in the payload so UIs can render them immediately. let initial_messages = initial_history.get_event_msgs(); + let skill_load_outcome = skill_load_outcome_for_client(skills.as_ref()); let events = std::iter::once(Event { id: INITIAL_SUBMIT_ID.to_owned(), @@ -596,6 +662,7 @@ impl Session { history_log_id, history_entry_count, initial_messages, + skill_load_outcome, rollout_path, }), }) @@ -750,11 +817,19 @@ impl Session { session_configuration }; + let per_turn_config = Self::build_per_turn_config(&session_configuration); + let model_family = self + .services + .models_manager + .construct_model_family(session_configuration.model.as_str(), &per_turn_config) + .await; let mut turn_context: TurnContext = Self::make_turn_context( Some(Arc::clone(&self.services.auth_manager)), &self.services.otel_event_manager, session_configuration.provider.clone(), &session_configuration, + per_turn_config, + model_family, self.conversation_id, sub_id, ); @@ -771,14 +846,16 @@ impl Session { ) -> Option { let prev = previous?; - let prev_context = EnvironmentContext::from(prev.as_ref()); - let next_context = EnvironmentContext::from(next); + let shell = self.user_shell(); + let prev_context = EnvironmentContext::from_turn_context(prev.as_ref(), shell.as_ref()); + let next_context = EnvironmentContext::from_turn_context(next, shell.as_ref()); if prev_context.equals_except_shell(&next_context) { return None; } Some(ResponseItem::from(EnvironmentContext::diff( prev.as_ref(), next, + shell.as_ref(), ))) } @@ -810,7 +887,7 @@ impl Session { } } - async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) { + pub(crate) async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) { self.send_event( turn_context, EventMsg::ItemStarted(ItemStartedEvent { @@ -822,7 +899,11 @@ impl Session { .await; } - async fn emit_turn_item_completed(&self, turn_context: &TurnContext, item: TurnItem) { + pub(crate) async fn emit_turn_item_completed( + &self, + turn_context: &TurnContext, + item: TurnItem, + ) { self.send_event( turn_context, EventMsg::ItemCompleted(ItemCompletedEvent { @@ -834,31 +915,38 @@ impl Session { .await; } - pub(crate) async fn assess_sandbox_command( + /// Adds an execpolicy amendment to both the in-memory and on-disk policies so future + /// commands can use the newly approved prefix. + pub(crate) async fn persist_execpolicy_amendment( &self, - turn_context: &TurnContext, - call_id: &str, - command: &[String], - failure_message: Option<&str>, - ) -> Option { - let config = turn_context.client.config(); - let provider = turn_context.client.provider().clone(); - let auth_manager = Arc::clone(&self.services.auth_manager); - let otel = self.services.otel_event_manager.clone(); - crate::sandboxing::assessment::assess_command( - config, - provider, - auth_manager, - &otel, - self.conversation_id, - turn_context.client.get_session_source(), - call_id, - command, - &turn_context.sandbox_policy, - &turn_context.cwd, - failure_message, + amendment: &ExecPolicyAmendment, + ) -> Result<(), ExecPolicyUpdateError> { + let features = self.features.clone(); + let (codex_home, current_policy) = { + let state = self.state.lock().await; + ( + state + .session_configuration + .original_config_do_not_use + .codex_home + .clone(), + state.session_configuration.exec_policy.clone(), + ) + }; + + if !features.enabled(Feature::ExecPolicy) { + error!("attempted to append execpolicy rule while execpolicy feature is disabled"); + return Err(ExecPolicyUpdateError::FeatureDisabled); + } + + crate::exec_policy::append_execpolicy_amendment_and_update( + &codex_home, + ¤t_policy, + &amendment.command, ) - .await + .await?; + + Ok(()) } /// Emit an exec approval request event and await the user's decision. @@ -866,6 +954,7 @@ impl Session { /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered /// to the correct in-flight turn. If the task is aborted, this returns the /// default `ReviewDecision` (`Denied`). + #[allow(clippy::too_many_arguments)] pub async fn request_command_approval( &self, turn_context: &TurnContext, @@ -873,7 +962,7 @@ impl Session { command: Vec, cwd: PathBuf, reason: Option, - risk: Option, + proposed_execpolicy_amendment: Option, ) -> ReviewDecision { let sub_id = turn_context.sub_id.clone(); // Add the tx_approve callback to the map before sending the request. @@ -900,7 +989,7 @@ impl Session { command, cwd, reason, - risk, + proposed_execpolicy_amendment, parsed_cmd, }); self.send_event(turn_context, event).await; @@ -1037,7 +1126,7 @@ impl Session { } pub(crate) async fn record_model_warning(&self, message: impl Into, ctx: &TurnContext) { - if !self.enabled(Feature::ModelWarnings).await { + if !self.enabled(Feature::ModelWarnings) { return; } @@ -1066,13 +1155,12 @@ impl Session { self.persist_rollout_items(&rollout_items).await; } - pub async fn enabled(&self, feature: Feature) -> bool { - self.state - .lock() - .await - .session_configuration - .features - .enabled(feature) + pub fn enabled(&self, feature: Feature) -> bool { + self.features.enabled(feature) + } + + pub(crate) fn features(&self) -> Features { + self.features.clone() } async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) { @@ -1087,6 +1175,7 @@ impl Session { pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec { let mut items = Vec::::with_capacity(3); + let shell = self.user_shell(); if let Some(developer_instructions) = turn_context.developer_instructions.as_deref() { items.push(DeveloperInstructions::new(developer_instructions.to_string()).into()); } @@ -1103,7 +1192,7 @@ impl Session { Some(turn_context.cwd.clone()), Some(turn_context.approval_policy), Some(turn_context.sandbox_policy.clone()), - self.user_shell().clone(), + shell.as_ref().clone(), ))); items } @@ -1255,7 +1344,7 @@ impl Session { turn_context: Arc, cancellation_token: CancellationToken, ) { - if !self.enabled(Feature::GhostCommit).await { + if !self.enabled(Feature::GhostCommit) { return; } let token = match turn_context.tool_call_gate.subscribe().await { @@ -1378,8 +1467,8 @@ impl Session { &self.services.notifier } - pub(crate) fn user_shell(&self) -> &shell::Shell { - &self.services.user_shell + pub(crate) fn user_shell(&self) -> Arc { + Arc::clone(&self.services.user_shell) } fn show_raw_agent_reasoning(&self) -> bool { @@ -1509,6 +1598,7 @@ mod handlers { use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::TurnAbortReason; + use codex_protocol::protocol::WarningEvent; use codex_protocol::user_input::UserInput; use codex_rmcp_client::ElicitationAction; @@ -1623,7 +1713,25 @@ mod handlers { } } + /// Propagate a user's exec approval decision to the session. + /// Also optionally applies an execpolicy amendment. pub async fn exec_approval(sess: &Arc, id: String, decision: ReviewDecision) { + if let ReviewDecision::ApprovedExecpolicyAmendment { + proposed_execpolicy_amendment, + } = &decision + && let Err(err) = sess + .persist_execpolicy_amendment(proposed_execpolicy_amendment) + .await + { + let message = format!("Failed to apply execpolicy amendment: {err}"); + tracing::warn!("{message}"); + let warning = EventMsg::Warning(WarningEvent { message }); + sess.send_event_raw(Event { + id: id.clone(), + msg: warning, + }) + .await; + } match decision { ReviewDecision::Abort => { sess.interrupt_task().await; @@ -1825,10 +1933,13 @@ async fn spawn_review_thread( resolved: crate::review_prompts::ResolvedReviewRequest, ) { let model = config.review_model.clone(); - let review_model_family = find_family_for_model(&model) - .unwrap_or_else(|| parent_turn_context.client.get_model_family()); + let review_model_family = sess + .services + .models_manager + .construct_model_family(&model, &config) + .await; // For reviews, disable web_search and view_image regardless of global settings. - let mut review_features = config.features.clone(); + let mut review_features = sess.features.clone(); review_features .disable(crate::features::Feature::WebSearchRequest) .disable(crate::features::Feature::ViewImageTool); @@ -1845,26 +1956,23 @@ async fn spawn_review_thread( // Build per‑turn client with the requested model/family. let mut per_turn_config = (*config).clone(); - per_turn_config.model = model.clone(); - per_turn_config.model_family = model_family.clone(); per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low); per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed; - if let Some(model_info) = get_model_info(&model_family) { - per_turn_config.model_context_window = Some(model_info.context_window); - } + per_turn_config.features = review_features.clone(); let otel_event_manager = parent_turn_context .client .get_otel_event_manager() .with_model( - per_turn_config.model.as_str(), - per_turn_config.model_family.slug.as_str(), + config.review_model.as_str(), + review_model_family.slug.as_str(), ); let per_turn_config = Arc::new(per_turn_config); let client = ModelClient::new( per_turn_config.clone(), auth_manager, + model_family.clone(), otel_event_manager, provider, per_turn_config.model_reasoning_effort, @@ -1889,7 +1997,7 @@ async fn spawn_review_thread( codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), exec_policy: parent_turn_context.exec_policy.clone(), - truncation_policy: TruncationPolicy::new(&per_turn_config), + truncation_policy: TruncationPolicy::new(&per_turn_config, model_family.truncation_policy), }; // Seed the child task with the review prompt as the initial user message. @@ -1908,6 +2016,30 @@ async fn spawn_review_thread( .await; } +fn skill_load_outcome_for_client( + outcome: Option<&SkillLoadOutcome>, +) -> Option { + outcome.map(|outcome| SkillLoadOutcomeInfo { + skills: outcome + .skills + .iter() + .map(|skill| SkillInfo { + name: skill.name.clone(), + description: skill.description.clone(), + path: skill.path.clone(), + }) + .collect(), + errors: outcome + .errors + .iter() + .map(|err| SkillErrorInfo { + path: err.path.clone(), + message: err.message.clone(), + }) + .collect(), + }) +} + /// Takes a user message as input and runs a loop where, at each turn, the model /// replies with either: /// @@ -1936,11 +2068,26 @@ pub(crate) async fn run_task( }); sess.send_event(&turn_context, event).await; + let SkillInjections { + items: skill_items, + warnings: skill_warnings, + } = build_skill_injections(&input, sess.services.skills.as_ref()).await; + + for message in skill_warnings { + sess.send_event(&turn_context, EventMsg::Warning(WarningEvent { message })) + .await; + } + let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); let response_item: ResponseItem = initial_input_for_turn.clone().into(); sess.record_response_item_and_emit_turn_item(turn_context.as_ref(), response_item) .await; + if !skill_items.is_empty() { + sess.record_conversation_items(&turn_context, &skill_items) + .await; + } + sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token()) .await; let mut last_agent_message: Option = None; @@ -1984,19 +2131,21 @@ pub(crate) async fn run_task( .await { Ok(turn_output) => { - let processed_items = turn_output; + let TurnRunResult { + needs_follow_up, + last_agent_message: turn_last_agent_message, + } = turn_output; let limit = turn_context .client - .get_auto_compact_token_limit() + .get_model_family() + .auto_compact_token_limit() .unwrap_or(i64::MAX); let total_usage_tokens = sess.get_total_token_usage().await; let token_limit_reached = total_usage_tokens >= limit; - let (responses, items_to_record_in_conversation_history) = - process_items(processed_items, &sess, &turn_context).await; // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop. if token_limit_reached { - if should_use_remote_compact_task(&sess).await { + if should_use_remote_compact_task(&sess) { run_inline_remote_auto_compact_task(sess.clone(), turn_context.clone()) .await; } else { @@ -2005,10 +2154,8 @@ pub(crate) async fn run_task( continue; } - if responses.is_empty() { - last_agent_message = get_last_assistant_message_from_turn( - &items_to_record_in_conversation_history, - ); + if !needs_follow_up { + last_agent_message = turn_last_agent_message; sess.notifier() .notify(&UserNotification::AgentTurnComplete { thread_id: sess.conversation_id.to_string(), @@ -2021,10 +2168,7 @@ pub(crate) async fn run_task( } continue; } - Err(CodexErr::TurnAborted { - dangling_artifacts: processed_items, - }) => { - let _ = process_items(processed_items, &sess, &turn_context).await; + Err(CodexErr::TurnAborted) => { // Aborted turn is reported via a different event. break; } @@ -2054,7 +2198,7 @@ async fn run_turn( turn_diff_tracker: SharedTurnDiffTracker, input: Vec, cancellation_token: CancellationToken, -) -> CodexResult> { +) -> CodexResult { let mcp_tools = sess .services .mcp_connection_manager @@ -2078,31 +2222,11 @@ async fn run_turn( .get_model_family() .supports_parallel_tool_calls; - // TODO(jif) revert once testing phase is done. - let parallel_tool_calls = model_supports_parallel - && sess - .state - .lock() - .await - .session_configuration - .features - .enabled(Feature::ParallelToolCalls); - let mut base_instructions = turn_context.base_instructions.clone(); - if parallel_tool_calls { - static INSTRUCTIONS: &str = include_str!("../templates/parallel/instructions.md"); - if let Some(family) = - find_family_for_model(&sess.state.lock().await.session_configuration.model) - { - let mut new_instructions = base_instructions.unwrap_or(family.base_instructions); - new_instructions.push_str(INSTRUCTIONS); - base_instructions = Some(new_instructions); - } - } let prompt = Prompt { input, tools: router.specs(), - parallel_tool_calls, - base_instructions_override: base_instructions, + parallel_tool_calls: model_supports_parallel && sess.enabled(Feature::ParallelToolCalls), + base_instructions_override: turn_context.base_instructions.clone(), output_schema: turn_context.final_output_json_schema.clone(), }; @@ -2118,13 +2242,10 @@ async fn run_turn( ) .await { + // todo(aibrahim): map special cases and ? on other errors Ok(output) => return Ok(output), - Err(CodexErr::TurnAborted { - dangling_artifacts: processed_items, - }) => { - return Err(CodexErr::TurnAborted { - dangling_artifacts: processed_items, - }); + Err(CodexErr::TurnAborted) => { + return Err(CodexErr::TurnAborted); } Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), @@ -2177,14 +2298,29 @@ async fn run_turn( } } -/// When the model is prompted, it returns a stream of events. Some of these -/// events map to a `ResponseItem`. A `ResponseItem` may need to be -/// "handled" such that it produces a `ResponseInputItem` that needs to be -/// sent back to the model on the next turn. #[derive(Debug)] -pub struct ProcessedResponseItem { - pub item: ResponseItem, - pub response: Option, +struct TurnRunResult { + needs_follow_up: bool, + last_agent_message: Option, +} + +async fn drain_in_flight( + in_flight: &mut FuturesOrdered>>, + sess: Arc, + turn_context: Arc, +) -> CodexResult<()> { + while let Some(res) = in_flight.next().await { + match res { + Ok(response_input) => { + sess.record_conversation_items(&turn_context, &[response_input.into()]) + .await; + } + Err(err) => { + error_or_panic(format!("in-flight tool future failed during drain: {err}")); + } + } + } + Ok(()) } #[allow(clippy::too_many_arguments)] @@ -2195,7 +2331,7 @@ async fn try_run_turn( turn_diff_tracker: SharedTurnDiffTracker, prompt: &Prompt, cancellation_token: CancellationToken, -) -> CodexResult> { +) -> CodexResult { let rollout_item = RolloutItem::TurnContext(TurnContextItem { cwd: turn_context.cwd.clone(), approval_policy: turn_context.approval_policy, @@ -2219,114 +2355,47 @@ async fn try_run_turn( Arc::clone(&turn_context), Arc::clone(&turn_diff_tracker), ); - let mut output: FuturesOrdered>> = + let mut in_flight: FuturesOrdered>> = FuturesOrdered::new(); - + let mut needs_follow_up = false; + let mut last_agent_message: Option = None; let mut active_item: Option = None; - - loop { - // Poll the next item from the model stream. We must inspect *both* Ok and Err - // cases so that transient stream failures (e.g., dropped SSE connection before - // `response.completed`) bubble up and trigger the caller's retry logic. + let outcome: CodexResult = loop { let event = match stream.next().or_cancel(&cancellation_token).await { Ok(event) => event, - Err(codex_async_utils::CancelErr::Cancelled) => { - let processed_items = output.try_collect().await?; - return Err(CodexErr::TurnAborted { - dangling_artifacts: processed_items, - }); - } + Err(codex_async_utils::CancelErr::Cancelled) => break Err(CodexErr::TurnAborted), }; let event = match event { Some(res) => res?, None => { - return Err(CodexErr::Stream( + break Err(CodexErr::Stream( "stream closed before response.completed".into(), None, )); } }; - let add_completed = &mut |response_item: ProcessedResponseItem| { - output.push_back(future::ready(Ok(response_item)).boxed()); - }; - match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { let previously_active_item = active_item.take(); - match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await { - Ok(Some(call)) => { - let payload_preview = call.payload.log_payload().into_owned(); - tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview); + let mut ctx = HandleOutputCtx { + sess: sess.clone(), + turn_context: turn_context.clone(), + tool_runtime: tool_runtime.clone(), + cancellation_token: cancellation_token.child_token(), + }; - let response = - tool_runtime.handle_tool_call(call, cancellation_token.child_token()); - - output.push_back( - async move { - Ok(ProcessedResponseItem { - item, - response: Some(response.await?), - }) - } - .boxed(), - ); - } - Ok(None) => { - if let Some(turn_item) = handle_non_tool_response_item(&item).await { - if previously_active_item.is_none() { - sess.emit_turn_item_started(&turn_context, &turn_item).await; - } - - sess.emit_turn_item_completed(&turn_context, turn_item) - .await; - } - - add_completed(ProcessedResponseItem { - item, - response: None, - }); - } - Err(FunctionCallError::MissingLocalShellCallId) => { - let msg = "LocalShellCall without call_id or id"; - turn_context - .client - .get_otel_event_manager() - .log_tool_failed("local_shell", msg); - error!(msg); - - let response = ResponseInputItem::FunctionCallOutput { - call_id: String::new(), - output: FunctionCallOutputPayload { - content: msg.to_string(), - ..Default::default() - }, - }; - add_completed(ProcessedResponseItem { - item, - response: Some(response), - }); - } - Err(FunctionCallError::RespondToModel(message)) - | Err(FunctionCallError::Denied(message)) => { - let response = ResponseInputItem::FunctionCallOutput { - call_id: String::new(), - output: FunctionCallOutputPayload { - content: message, - ..Default::default() - }, - }; - add_completed(ProcessedResponseItem { - item, - response: Some(response), - }); - } - Err(FunctionCallError::Fatal(message)) => { - return Err(CodexErr::Fatal(message)); - } + let output_result = + handle_output_item_done(&mut ctx, item, previously_active_item).await?; + if let Some(tool_future) = output_result.tool_future { + in_flight.push_back(tool_future); } + if let Some(agent_message) = output_result.last_agent_message { + last_agent_message = Some(agent_message); + } + needs_follow_up |= output_result.needs_follow_up; } ResponseEvent::OutputItemAdded(item) => { if let Some(turn_item) = handle_non_tool_response_item(&item).await { @@ -2347,7 +2416,6 @@ async fn try_run_turn( } => { sess.update_token_usage_info(&turn_context, token_usage.as_ref()) .await; - let processed_items = output.try_collect().await?; let unified_diff = { let mut tracker = turn_diff_tracker.lock().await; tracker.get_unified_diff() @@ -2357,7 +2425,10 @@ async fn try_run_turn( sess.send_event(&turn_context, msg).await; } - return Ok(processed_items); + break Ok(TurnRunResult { + needs_follow_up, + last_agent_message, + }); } ResponseEvent::OutputTextDelta(delta) => { // In review child threads, suppress assistant text deltas; the @@ -2424,22 +2495,11 @@ async fn try_run_turn( } } } - } -} + }; -async fn handle_non_tool_response_item(item: &ResponseItem) -> Option { - debug!(?item, "Output item"); + drain_in_flight(&mut in_flight, sess, turn_context).await?; - match item { - ResponseItem::Message { .. } - | ResponseItem::Reasoning { .. } - | ResponseItem::WebSearchCall { .. } => parse_turn_item(item), - ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => { - debug!("unexpected tool output from stream"); - None - } - _ => None, - } + outcome } pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option { @@ -2462,7 +2522,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) - }) } -use crate::features::Features; #[cfg(test)] pub(crate) use tests::make_session_and_context; @@ -2472,11 +2531,14 @@ pub(crate) use tests::make_session_and_context_with_rx; #[cfg(test)] mod tests { use super::*; + use crate::CodexAuth; use crate::config::ConfigOverrides; use crate::config::ConfigToml; use crate::exec::ExecToolCallOutput; + use crate::function_tool::FunctionCallError; use crate::shell::default_user_shell; use crate::tools::format_exec_output_str; + use codex_protocol::models::FunctionCallOutputPayload; use crate::protocol::CompactedItem; use crate::protocol::CreditsSnapshot; @@ -2562,9 +2624,10 @@ mod tests { ) .expect("load default test config"); let config = Arc::new(config); + let model = ModelsManager::get_model_offline(config.model.as_deref()); let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), - model: config.model.clone(), + model, model_reasoning_effort: config.model_reasoning_effort, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), @@ -2575,8 +2638,7 @@ mod tests { sandbox_policy: config.sandbox_policy.clone(), cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), - features: Features::default(), - exec_policy: Arc::new(ExecPolicy::empty()), + exec_policy: Arc::new(RwLock::new(ExecPolicy::empty())), session_source: SessionSource::Exec, }; @@ -2593,6 +2655,7 @@ mod tests { unlimited: false, balance: Some("10.00".to_string()), }), + plan_type: Some(codex_protocol::account::PlanType::Plus), }; state.set_rate_limits(initial.clone()); @@ -2608,6 +2671,7 @@ mod tests { resets_at: Some(1_900), }), credits: None, + plan_type: None, }; state.set_rate_limits(update.clone()); @@ -2617,6 +2681,79 @@ mod tests { primary: update.primary.clone(), secondary: update.secondary, credits: initial.credits, + plan_type: initial.plan_type, + }) + ); + } + + #[test] + fn set_rate_limits_updates_plan_type_when_present() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let config = Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + ) + .expect("load default test config"); + let config = Arc::new(config); + let model = ModelsManager::get_model_offline(config.model.as_deref()); + let session_configuration = SessionConfiguration { + provider: config.model_provider.clone(), + model, + model_reasoning_effort: config.model_reasoning_effort, + model_reasoning_summary: config.model_reasoning_summary, + developer_instructions: config.developer_instructions.clone(), + user_instructions: config.user_instructions.clone(), + base_instructions: config.base_instructions.clone(), + compact_prompt: config.compact_prompt.clone(), + approval_policy: config.approval_policy, + sandbox_policy: config.sandbox_policy.clone(), + cwd: config.cwd.clone(), + original_config_do_not_use: Arc::clone(&config), + exec_policy: Arc::new(RwLock::new(ExecPolicy::empty())), + session_source: SessionSource::Exec, + }; + + let mut state = SessionState::new(session_configuration); + let initial = RateLimitSnapshot { + primary: Some(RateLimitWindow { + used_percent: 15.0, + window_minutes: Some(20), + resets_at: Some(1_600), + }), + secondary: Some(RateLimitWindow { + used_percent: 5.0, + window_minutes: Some(45), + resets_at: Some(1_650), + }), + credits: Some(CreditsSnapshot { + has_credits: true, + unlimited: false, + balance: Some("15.00".to_string()), + }), + plan_type: Some(codex_protocol::account::PlanType::Plus), + }; + state.set_rate_limits(initial.clone()); + + let update = RateLimitSnapshot { + primary: Some(RateLimitWindow { + used_percent: 35.0, + window_minutes: Some(25), + resets_at: Some(1_700), + }), + secondary: None, + credits: None, + plan_type: Some(codex_protocol::account::PlanType::Pro), + }; + state.set_rate_limits(update.clone()); + + assert_eq!( + state.latest_rate_limits, + Some(RateLimitSnapshot { + primary: update.primary, + secondary: update.secondary, + credits: initial.credits, + plan_type: update.plan_type, }) ); } @@ -2730,11 +2867,15 @@ mod tests { }) } - fn otel_event_manager(conversation_id: ConversationId, config: &Config) -> OtelEventManager { + fn otel_event_manager( + conversation_id: ConversationId, + config: &Config, + model_family: &ModelFamily, + ) -> OtelEventManager { OtelEventManager::new( conversation_id, - config.model.as_str(), - config.model_family.slug.as_str(), + ModelsManager::get_model_offline(config.model.as_deref()).as_str(), + model_family.slug.as_str(), None, Some("test@test.com".to_string()), Some(AuthMode::ChatGPT), @@ -2754,16 +2895,13 @@ mod tests { .expect("load default test config"); let config = Arc::new(config); let conversation_id = ConversationId::default(); - let otel_event_manager = otel_event_manager(conversation_id, config.as_ref()); - let auth_manager = AuthManager::shared( - config.cwd.clone(), - false, - config.cli_auth_credentials_store_mode, - ); - + let auth_manager = + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); + let models_manager = Arc::new(ModelsManager::new(auth_manager.clone())); + let model = ModelsManager::get_model_offline(config.model.as_deref()); let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), - model: config.model.clone(), + model, model_reasoning_effort: config.model_reasoning_effort, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), @@ -2774,10 +2912,16 @@ mod tests { sandbox_policy: config.sandbox_policy.clone(), cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), - features: Features::default(), - exec_policy: Arc::new(ExecPolicy::empty()), + exec_policy: Arc::new(RwLock::new(ExecPolicy::empty())), session_source: SessionSource::Exec, }; + let per_turn_config = Session::build_per_turn_config(&session_configuration); + let model_family = ModelsManager::construct_model_family_offline( + session_configuration.model.as_str(), + &per_turn_config, + ); + let otel_event_manager = + otel_event_manager(conversation_id, config.as_ref(), &model_family); let state = SessionState::new(session_configuration.clone()); @@ -2787,11 +2931,13 @@ mod tests { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), - user_shell: default_user_shell(), + user_shell: Arc::new(default_user_shell()), show_raw_agent_reasoning: config.show_raw_agent_reasoning, - auth_manager: Arc::clone(&auth_manager), + auth_manager: auth_manager.clone(), otel_event_manager: otel_event_manager.clone(), + models_manager, tool_approvals: Mutex::new(ApprovalStore::default()), + skills: None, }; let turn_context = Session::make_turn_context( @@ -2799,6 +2945,8 @@ mod tests { &otel_event_manager, session_configuration.provider.clone(), &session_configuration, + per_turn_config, + model_family, conversation_id, "turn_id".to_string(), ); @@ -2807,6 +2955,7 @@ mod tests { conversation_id, tx_event, state: Mutex::new(state), + features: config.features.clone(), active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), @@ -2832,16 +2981,13 @@ mod tests { .expect("load default test config"); let config = Arc::new(config); let conversation_id = ConversationId::default(); - let otel_event_manager = otel_event_manager(conversation_id, config.as_ref()); - let auth_manager = AuthManager::shared( - config.cwd.clone(), - false, - config.cli_auth_credentials_store_mode, - ); - + let auth_manager = + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); + let models_manager = Arc::new(ModelsManager::new(auth_manager.clone())); + let model = ModelsManager::get_model_offline(config.model.as_deref()); let session_configuration = SessionConfiguration { provider: config.model_provider.clone(), - model: config.model.clone(), + model, model_reasoning_effort: config.model_reasoning_effort, model_reasoning_summary: config.model_reasoning_summary, developer_instructions: config.developer_instructions.clone(), @@ -2852,10 +2998,16 @@ mod tests { sandbox_policy: config.sandbox_policy.clone(), cwd: config.cwd.clone(), original_config_do_not_use: Arc::clone(&config), - features: Features::default(), - exec_policy: Arc::new(ExecPolicy::empty()), + exec_policy: Arc::new(RwLock::new(ExecPolicy::empty())), session_source: SessionSource::Exec, }; + let per_turn_config = Session::build_per_turn_config(&session_configuration); + let model_family = ModelsManager::construct_model_family_offline( + session_configuration.model.as_str(), + &per_turn_config, + ); + let otel_event_manager = + otel_event_manager(conversation_id, config.as_ref(), &model_family); let state = SessionState::new(session_configuration.clone()); @@ -2865,11 +3017,13 @@ mod tests { unified_exec_manager: UnifiedExecSessionManager::default(), notifier: UserNotifier::new(None), rollout: Mutex::new(None), - user_shell: default_user_shell(), + user_shell: Arc::new(default_user_shell()), show_raw_agent_reasoning: config.show_raw_agent_reasoning, auth_manager: Arc::clone(&auth_manager), otel_event_manager: otel_event_manager.clone(), + models_manager, tool_approvals: Mutex::new(ApprovalStore::default()), + skills: None, }; let turn_context = Arc::new(Session::make_turn_context( @@ -2877,6 +3031,8 @@ mod tests { &otel_event_manager, session_configuration.provider.clone(), &session_configuration, + per_turn_config, + model_family, conversation_id, "turn_id".to_string(), )); @@ -2885,6 +3041,7 @@ mod tests { conversation_id, tx_event, state: Mutex::new(state), + features: config.features.clone(), active_turn: Mutex::new(None), services, next_internal_sub_id: AtomicU64::new(0), @@ -2895,15 +3052,10 @@ mod tests { #[tokio::test] async fn record_model_warning_appends_user_message() { - let (session, turn_context) = make_session_and_context(); - - session - .state - .lock() - .await - .session_configuration - .features - .enable(Feature::ModelWarnings); + let (mut session, turn_context) = make_session_and_context(); + let mut features = Features::with_defaults(); + features.enable(Feature::ModelWarnings); + session.features = features; session .record_model_warning("too many unified exec sessions", &turn_context) @@ -3221,6 +3373,7 @@ mod tests { use crate::exec::ExecParams; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; + use crate::sandboxing::SandboxPermissions; use crate::turn_diff_tracker::TurnDiffTracker; use std::collections::HashMap; @@ -3231,6 +3384,7 @@ mod tests { let mut turn_context = Arc::new(turn_context_raw); let timeout_ms = 1000; + let sandbox_permissions = SandboxPermissions::RequireEscalated; let params = ExecParams { command: if cfg!(windows) { vec![ @@ -3248,13 +3402,13 @@ mod tests { cwd: turn_context.cwd.clone(), expiration: timeout_ms.into(), env: HashMap::new(), - with_escalated_permissions: Some(true), + sandbox_permissions, justification: Some("test".to_string()), arg0: None, }; let params2 = ExecParams { - with_escalated_permissions: Some(false), + sandbox_permissions: SandboxPermissions::UseDefault, command: params.command.clone(), cwd: params.cwd.clone(), expiration: timeout_ms.into(), @@ -3281,7 +3435,7 @@ mod tests { "command": params.command.clone(), "workdir": Some(turn_context.cwd.to_string_lossy().to_string()), "timeout_ms": params.expiration.timeout_ms(), - "with_escalated_permissions": params.with_escalated_permissions, + "sandbox_permissions": params.sandbox_permissions, "justification": params.justification.clone(), }) .to_string(), @@ -3318,7 +3472,7 @@ mod tests { "command": params2.command.clone(), "workdir": Some(turn_context.cwd.to_string_lossy().to_string()), "timeout_ms": params2.expiration.timeout_ms(), - "with_escalated_permissions": params2.with_escalated_permissions, + "sandbox_permissions": params2.sandbox_permissions, "justification": params2.justification.clone(), }) .to_string(), @@ -3351,6 +3505,7 @@ mod tests { #[tokio::test] async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { use crate::protocol::AskForApproval; + use crate::sandboxing::SandboxPermissions; use crate::turn_diff_tracker::TurnDiffTracker; let (session, mut turn_context_raw) = make_session_and_context(); @@ -3370,7 +3525,7 @@ mod tests { payload: ToolPayload::Function { arguments: serde_json::json!({ "cmd": "echo hi", - "with_escalated_permissions": true, + "sandbox_permissions": SandboxPermissions::RequireEscalated, "justification": "need unsandboxed execution", }) .to_string(), diff --git a/codex-rs/core/src/codex_delegate.rs b/codex-rs/core/src/codex_delegate.rs index 796331d1e8..75b29eddee 100644 --- a/codex-rs/core/src/codex_delegate.rs +++ b/codex-rs/core/src/codex_delegate.rs @@ -25,6 +25,7 @@ use crate::codex::Session; use crate::codex::TurnContext; use crate::config::Config; use crate::error::CodexErr; +use crate::openai_models::models_manager::ModelsManager; use codex_protocol::protocol::InitialHistory; /// Start an interactive sub-Codex conversation and return IO channels. @@ -35,6 +36,7 @@ use codex_protocol::protocol::InitialHistory; pub(crate) async fn run_codex_conversation_interactive( config: Config, auth_manager: Arc, + models_manager: Arc, parent_session: Arc, parent_ctx: Arc, cancel_token: CancellationToken, @@ -46,6 +48,7 @@ pub(crate) async fn run_codex_conversation_interactive( let CodexSpawnOk { codex, .. } = Codex::spawn( config, auth_manager, + models_manager, initial_history.unwrap_or(InitialHistory::New), SessionSource::SubAgent(SubAgentSource::Review), ) @@ -88,9 +91,11 @@ pub(crate) async fn run_codex_conversation_interactive( /// Convenience wrapper for one-time use with an initial prompt. /// /// Internally calls the interactive variant, then immediately submits the provided input. +#[allow(clippy::too_many_arguments)] pub(crate) async fn run_codex_conversation_one_shot( config: Config, auth_manager: Arc, + models_manager: Arc, input: Vec, parent_session: Arc, parent_ctx: Arc, @@ -103,6 +108,7 @@ pub(crate) async fn run_codex_conversation_one_shot( let io = run_codex_conversation_interactive( config, auth_manager, + models_manager, parent_session, parent_ctx, child_cancel.clone(), @@ -274,7 +280,7 @@ async fn handle_exec_approval( event.command, event.cwd, event.reason, - event.risk, + event.proposed_execpolicy_amendment, ); let decision = await_approval_with_cancel( approval_fut, diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index fb5c187b7f..7ce325a75a 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -32,13 +32,13 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md"); const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000; -pub(crate) async fn should_use_remote_compact_task(session: &Session) -> bool { +pub(crate) fn should_use_remote_compact_task(session: &Session) -> bool { session .services .auth_manager .auth() .is_some_and(|auth| auth.mode == AuthMode::ChatGPT) - && session.enabled(Feature::RemoteCompaction).await + && session.enabled(Feature::RemoteCompaction) } pub(crate) async fn run_inline_auto_compact_task( diff --git a/codex-rs/core/src/config/edit.rs b/codex-rs/core/src/config/edit.rs index b8862fa5c5..37c2aba6ef 100644 --- a/codex-rs/core/src/config/edit.rs +++ b/codex-rs/core/src/config/edit.rs @@ -2,8 +2,8 @@ use crate::config::CONFIG_TOML_FILE; use crate::config::types::McpServerConfig; use crate::config::types::Notice; use anyhow::Context; -use codex_protocol::config_types::ReasoningEffort; use codex_protocol::config_types::TrustLevel; +use codex_protocol::openai_models::ReasoningEffort; use std::collections::BTreeMap; use std::path::Path; use std::path::PathBuf; @@ -555,6 +555,14 @@ impl ConfigEditsBuilder { self } + pub fn with_edits(mut self, edits: I) -> Self + where + I: IntoIterator, + { + self.edits.extend(edits); + self + } + /// Apply edits on a blocking thread. pub fn apply_blocking(self) -> anyhow::Result<()> { apply_blocking(&self.codex_home, self.profile.as_deref(), &self.edits) @@ -574,7 +582,7 @@ impl ConfigEditsBuilder { mod tests { use super::*; use crate::config::types::McpServerTransportConfig; - use codex_protocol::config_types::ReasoningEffort; + use codex_protocol::openai_models::ReasoningEffort; use pretty_assertions::assert_eq; use tempfile::tempdir; use tokio::runtime::Builder; @@ -603,6 +611,24 @@ model_reasoning_effort = "high" assert_eq!(contents, expected); } + #[test] + fn builder_with_edits_applies_custom_paths() { + let tmp = tempdir().expect("tmpdir"); + let codex_home = tmp.path(); + + ConfigEditsBuilder::new(codex_home) + .with_edits(vec![ConfigEdit::SetPath { + segments: vec!["enabled".to_string()], + value: value(true), + }]) + .apply_blocking() + .expect("persist"); + + let contents = + std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config"); + assert_eq!(contents, "enabled = true\n"); + } + #[test] fn blocking_set_model_preserves_inline_table_contents() { let tmp = tempdir().expect("tmpdir"); diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 6276d3b6e3..4c1a073c57 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -7,7 +7,6 @@ use crate::config::types::Notifications; use crate::config::types::OtelConfig; use crate::config::types::OtelConfigToml; use crate::config::types::OtelExporterKind; -use crate::config::types::ReasoningSummaryFormat; use crate::config::types::SandboxWorkspaceWrite; use crate::config::types::ShellEnvironmentPolicy; use crate::config::types::ShellEnvironmentPolicyToml; @@ -22,14 +21,10 @@ use crate::features::FeatureOverrides; use crate::features::Features; use crate::features::FeaturesToml; use crate::git_info::resolve_root_git_project_for_trust; -use crate::model_family::ModelFamily; -use crate::model_family::derive_default_model_family; -use crate::model_family::find_family_for_model; use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::OLLAMA_OSS_PROVIDER_ID; use crate::model_provider_info::built_in_model_providers; -use crate::openai_model_info::get_model_info; use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME; use crate::project_doc::LOCAL_PROJECT_DOC_FILENAME; use crate::protocol::AskForApproval; @@ -38,12 +33,14 @@ use crate::util::resolve_path; use codex_app_server_protocol::Tools; use codex_app_server_protocol::UserSavedConfig; use codex_protocol::config_types::ForcedLoginMethod; -use codex_protocol::config_types::ReasoningEffort; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::SandboxMode; use codex_protocol::config_types::TrustLevel; use codex_protocol::config_types::Verbosity; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningSummaryFormat; use codex_rmcp_client::OAuthCredentialsStoreMode; +use codex_utils_absolute_path::AbsolutePathBufGuard; use dirs::home_dir; use dunce::canonicalize; use serde::Deserialize; @@ -53,6 +50,8 @@ use std::collections::HashMap; use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; +#[cfg(test)] +use tempfile::tempdir; use crate::config::profile::ConfigProfile; use toml::Value as TomlValue; @@ -62,9 +61,7 @@ pub mod edit; pub mod profile; pub mod types; -pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex"; -const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex"; -pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex"; +const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex-max"; /// Maximum number of bytes of the documentation that will be embedded. Larger /// files are *silently truncated* to this size so we do not take up too much of @@ -73,17 +70,26 @@ pub(crate) const PROJECT_DOC_MAX_BYTES: usize = 32 * 1024; // 32 KiB pub const CONFIG_TOML_FILE: &str = "config.toml"; +#[cfg(test)] +pub(crate) fn test_config() -> Config { + let codex_home = tempdir().expect("create temp dir"); + Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + ) + .expect("load default test config") +} + /// Application configuration loaded from disk and merged with overrides. #[derive(Debug, Clone, PartialEq)] pub struct Config { /// Optional override of model selection. - pub model: String, + pub model: Option, /// Model used specifically for review sessions. Defaults to "gpt-5.1-codex-max". pub review_model: String, - pub model_family: ModelFamily, - /// Size of the context window for the model, in tokens. pub model_context_window: Option, @@ -196,6 +202,7 @@ pub struct Config { /// Additional filenames to try when looking for project-level docs. pub project_doc_fallback_filenames: Vec, + // todo(aibrahim): this should be used in the override model family /// Token budget applied when storing tool/function outputs in the context manager. pub tool_output_token_limit: Option, @@ -226,6 +233,12 @@ pub struct Config { /// request using the Responses API. pub model_reasoning_summary: ReasoningSummary, + /// Optional override to force-enable reasoning summaries for the configured model. + pub model_supports_reasoning_summaries: Option, + + /// Optional override to force reasoning summary format for the configured model. + pub model_reasoning_summary_format: Option, + /// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`). pub model_verbosity: Option, @@ -245,9 +258,6 @@ pub struct Config { pub tools_web_search_request: bool, - /// When `true`, run a model-based assessment for commands denied by the sandbox. - pub experimental_sandbox_command_assessment: bool, - /// If set to `true`, used only the experimental unified exec tool. pub use_experimental_unified_exec_tool: bool, @@ -299,9 +309,9 @@ impl Config { ) .await?; - let cfg: ConfigToml = root_value.try_into().map_err(|e| { + let cfg = deserialize_config_toml_with_base(root_value, &codex_home).map_err(|e| { tracing::error!("Failed to deserialize overridden config: {e}"); - std::io::Error::new(std::io::ErrorKind::InvalidData, e) + e })?; Self::load_from_base_config_with_overrides(cfg, overrides, codex_home) @@ -319,9 +329,9 @@ pub async fn load_config_as_toml_with_cli_overrides( ) .await?; - let cfg: ConfigToml = root_value.try_into().map_err(|e| { + let cfg = deserialize_config_toml_with_base(root_value, codex_home).map_err(|e| { tracing::error!("Failed to deserialize overridden config: {e}"); - std::io::Error::new(std::io::ErrorKind::InvalidData, e) + e })?; Ok(cfg) @@ -357,6 +367,18 @@ fn apply_overlays( base } +fn deserialize_config_toml_with_base( + root_value: TomlValue, + config_base_dir: &Path, +) -> std::io::Result { + // This guard ensures that any relative paths that is deserialized into an + // [AbsolutePathBuf] is resolved against `config_base_dir`. + let _guard = AbsolutePathBufGuard::new(config_base_dir); + root_value + .try_into() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e)) +} + pub async fn load_global_mcp_servers( codex_home: &Path, ) -> std::io::Result> { @@ -720,7 +742,6 @@ pub struct ConfigToml { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, - pub experimental_sandbox_command_assessment: Option, /// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama". pub oss_provider: Option, } @@ -906,7 +927,6 @@ pub struct ConfigOverrides { pub include_apply_patch_tool: Option, pub show_raw_agent_reasoning: Option, pub tools_web_search_request: Option, - pub experimental_sandbox_command_assessment: Option, /// Additional directories that should be treated as writable roots for this session. pub additional_writable_roots: Vec, } @@ -965,7 +985,6 @@ impl Config { include_apply_patch_tool: include_apply_patch_tool_override, show_raw_agent_reasoning, tools_web_search_request: override_tools_web_search_request, - experimental_sandbox_command_assessment: sandbox_command_assessment_override, additional_writable_roots, } = overrides; @@ -990,7 +1009,6 @@ impl Config { let feature_overrides = FeatureOverrides { include_apply_patch_tool: include_apply_patch_tool_override, web_search_request: override_tools_web_search_request, - experimental_sandbox_command_assessment: sandbox_command_assessment_override, }; let features = Features::from_config(&cfg, &config_profile, feature_overrides); @@ -1089,8 +1107,6 @@ impl Config { let tools_web_search_request = features.enabled(Feature::WebSearchRequest); let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec); let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient); - let experimental_sandbox_command_assessment = - features.enabled(Feature::SandboxCommandAssessment); let forced_chatgpt_workspace_id = cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| { @@ -1104,30 +1120,7 @@ impl Config { let forced_login_method = cfg.forced_login_method; - let model = model - .or(config_profile.model) - .or(cfg.model) - .unwrap_or_else(default_model); - - let mut model_family = - find_family_for_model(&model).unwrap_or_else(|| derive_default_model_family(&model)); - - if let Some(supports_reasoning_summaries) = cfg.model_supports_reasoning_summaries { - model_family.supports_reasoning_summaries = supports_reasoning_summaries; - } - if let Some(model_reasoning_summary_format) = cfg.model_reasoning_summary_format { - model_family.reasoning_summary_format = model_reasoning_summary_format; - } - - let openai_model_info = get_model_info(&model_family); - let model_context_window = cfg - .model_context_window - .or_else(|| openai_model_info.as_ref().map(|info| info.context_window)); - let model_auto_compact_token_limit = cfg.model_auto_compact_token_limit.or_else(|| { - openai_model_info - .as_ref() - .and_then(|info| info.auto_compact_token_limit) - }); + let model = model.or(config_profile.model).or(cfg.model); let compact_prompt = compact_prompt.or(cfg.compact_prompt).and_then(|value| { let trimmed = value.trim(); @@ -1174,9 +1167,8 @@ impl Config { let config = Self { model, review_model, - model_family, - model_context_window, - model_auto_compact_token_limit, + model_context_window: cfg.model_context_window, + model_auto_compact_token_limit: cfg.model_auto_compact_token_limit, model_provider_id, model_provider, cwd: resolved_cwd, @@ -1230,6 +1222,8 @@ impl Config { .model_reasoning_summary .or(cfg.model_reasoning_summary) .unwrap_or_default(), + model_supports_reasoning_summaries: cfg.model_supports_reasoning_summaries, + model_reasoning_summary_format: cfg.model_reasoning_summary_format.clone(), model_verbosity: config_profile.model_verbosity.or(cfg.model_verbosity), chatgpt_base_url: config_profile .chatgpt_base_url @@ -1239,7 +1233,6 @@ impl Config { forced_login_method, include_apply_patch_tool: include_apply_patch_tool_flag, tools_web_search_request, - experimental_sandbox_command_assessment, use_experimental_unified_exec_tool, use_experimental_use_rmcp_client, features, @@ -1328,10 +1321,6 @@ impl Config { } } -fn default_model() -> String { - OPENAI_DEFAULT_MODEL.to_string() -} - fn default_review_model() -> String { OPENAI_DEFAULT_REVIEW_MODEL.to_string() } @@ -1870,10 +1859,11 @@ trust_level = "trusted" }; let root_value = load_resolved_config(codex_home.path(), Vec::new(), overrides).await?; - let cfg: ConfigToml = root_value.try_into().map_err(|e| { - tracing::error!("Failed to deserialize overridden config: {e}"); - std::io::Error::new(std::io::ErrorKind::InvalidData, e) - })?; + let cfg = + deserialize_config_toml_with_base(root_value, codex_home.path()).map_err(|e| { + tracing::error!("Failed to deserialize overridden config: {e}"); + e + })?; assert_eq!( cfg.mcp_oauth_credentials_store, Some(OAuthCredentialsStoreMode::Keyring), @@ -1990,10 +1980,11 @@ trust_level = "trusted" ) .await?; - let cfg: ConfigToml = root_value.try_into().map_err(|e| { - tracing::error!("Failed to deserialize overridden config: {e}"); - std::io::Error::new(std::io::ErrorKind::InvalidData, e) - })?; + let cfg = + deserialize_config_toml_with_base(root_value, codex_home.path()).map_err(|e| { + tracing::error!("Failed to deserialize overridden config: {e}"); + e + })?; assert_eq!(cfg.model.as_deref(), Some("managed_config")); Ok(()) @@ -2953,11 +2944,10 @@ model_verbosity = "high" )?; assert_eq!( Config { - model: "o3".to_string(), + model: Some("o3".to_string()), review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(), - model_family: find_family_for_model("o3").expect("known model slug"), - model_context_window: Some(200_000), - model_auto_compact_token_limit: Some(180_000), + model_context_window: None, + model_auto_compact_token_limit: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::Never, @@ -2983,6 +2973,8 @@ model_verbosity = "high" show_raw_agent_reasoning: false, model_reasoning_effort: Some(ReasoningEffort::High), model_reasoning_summary: ReasoningSummary::Detailed, + model_supports_reasoning_summaries: None, + model_reasoning_summary_format: None, model_verbosity: None, chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, @@ -2992,7 +2984,6 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - experimental_sandbox_command_assessment: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, features: Features::with_defaults(), @@ -3027,11 +3018,10 @@ model_verbosity = "high" fixture.codex_home(), )?; let expected_gpt3_profile_config = Config { - model: "gpt-3.5-turbo".to_string(), + model: Some("gpt-3.5-turbo".to_string()), review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(), - model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"), - model_context_window: Some(16_385), - model_auto_compact_token_limit: Some(14_746), + model_context_window: None, + model_auto_compact_token_limit: None, model_provider_id: "openai-chat-completions".to_string(), model_provider: fixture.openai_chat_completions_provider.clone(), approval_policy: AskForApproval::UnlessTrusted, @@ -3057,6 +3047,8 @@ model_verbosity = "high" show_raw_agent_reasoning: false, model_reasoning_effort: None, model_reasoning_summary: ReasoningSummary::default(), + model_supports_reasoning_summaries: None, + model_reasoning_summary_format: None, model_verbosity: None, chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, @@ -3066,7 +3058,6 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - experimental_sandbox_command_assessment: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, features: Features::with_defaults(), @@ -3116,11 +3107,10 @@ model_verbosity = "high" fixture.codex_home(), )?; let expected_zdr_profile_config = Config { - model: "o3".to_string(), + model: Some("o3".to_string()), review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(), - model_family: find_family_for_model("o3").expect("known model slug"), - model_context_window: Some(200_000), - model_auto_compact_token_limit: Some(180_000), + model_context_window: None, + model_auto_compact_token_limit: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, @@ -3146,6 +3136,8 @@ model_verbosity = "high" show_raw_agent_reasoning: false, model_reasoning_effort: None, model_reasoning_summary: ReasoningSummary::default(), + model_supports_reasoning_summaries: None, + model_reasoning_summary_format: None, model_verbosity: None, chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, @@ -3155,7 +3147,6 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - experimental_sandbox_command_assessment: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, features: Features::with_defaults(), @@ -3191,11 +3182,10 @@ model_verbosity = "high" fixture.codex_home(), )?; let expected_gpt5_profile_config = Config { - model: "gpt-5.1".to_string(), + model: Some("gpt-5.1".to_string()), review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(), - model_family: find_family_for_model("gpt-5.1").expect("known model slug"), - model_context_window: Some(272_000), - model_auto_compact_token_limit: Some(244_800), + model_context_window: None, + model_auto_compact_token_limit: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, @@ -3221,6 +3211,8 @@ model_verbosity = "high" show_raw_agent_reasoning: false, model_reasoning_effort: Some(ReasoningEffort::High), model_reasoning_summary: ReasoningSummary::Detailed, + model_supports_reasoning_summaries: None, + model_reasoning_summary_format: None, model_verbosity: Some(Verbosity::High), chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(), base_instructions: None, @@ -3230,7 +3222,6 @@ model_verbosity = "high" forced_login_method: None, include_apply_patch_tool: false, tools_web_search_request: false, - experimental_sandbox_command_assessment: false, use_experimental_unified_exec_tool: false, use_experimental_use_rmcp_client: false, features: Features::with_defaults(), diff --git a/codex-rs/core/src/config/profile.rs b/codex-rs/core/src/config/profile.rs index 3d9e60b8e5..978e1fcb63 100644 --- a/codex-rs/core/src/config/profile.rs +++ b/codex-rs/core/src/config/profile.rs @@ -2,10 +2,10 @@ use serde::Deserialize; use std::path::PathBuf; use crate::protocol::AskForApproval; -use codex_protocol::config_types::ReasoningEffort; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::SandboxMode; use codex_protocol::config_types::Verbosity; +use codex_protocol::openai_models::ReasoningEffort; /// Collection of common configuration options that a user can define as a unit /// in `config.toml`. @@ -27,7 +27,6 @@ pub struct ConfigProfile { pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, pub experimental_use_freeform_apply_patch: Option, - pub experimental_sandbox_command_assessment: Option, pub tools_web_search: Option, pub tools_view_image: Option, /// Optional feature toggles scoped to this profile. diff --git a/codex-rs/core/src/config/types.rs b/codex-rs/core/src/config/types.rs index 5e1b78aa7b..ca16b7a560 100644 --- a/codex-rs/core/src/config/types.rs +++ b/codex-rs/core/src/config/types.rs @@ -3,13 +3,14 @@ // Note this file should generally be restricted to simple struct/enum // definitions that do not contain business logic. -use serde::Deserializer; +use codex_utils_absolute_path::AbsolutePathBuf; use std::collections::HashMap; use std::path::PathBuf; use std::time::Duration; use wildmatch::WildMatchPattern; use serde::Deserialize; +use serde::Deserializer; use serde::Serialize; use serde::de::Error as SerdeError; @@ -285,9 +286,9 @@ pub enum OtelHttpProtocol { #[derive(Deserialize, Debug, Clone, PartialEq, Default)] #[serde(rename_all = "kebab-case")] pub struct OtelTlsConfig { - pub ca_certificate: Option, - pub client_certificate: Option, - pub client_private_key: Option, + pub ca_certificate: Option, + pub client_certificate: Option, + pub client_private_key: Option, } /// Which OTEL exporter to use. @@ -521,14 +522,6 @@ impl From for ShellEnvironmentPolicy { } } -#[derive(Deserialize, Debug, Clone, PartialEq, Eq, Default, Hash)] -#[serde(rename_all = "kebab-case")] -pub enum ReasoningSummaryFormat { - #[default] - None, - Experimental, -} - #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index b9a9c58f63..9b75c836ab 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -87,6 +87,7 @@ impl ContextManager { let items_tokens = self.items.iter().fold(0i64, |acc, item| { acc + match item { + ResponseItem::GhostSnapshot { .. } => 0, ResponseItem::Reasoning { encrypted_content: Some(content), .. diff --git a/codex-rs/core/src/conversation_manager.rs b/codex-rs/core/src/conversation_manager.rs index 0f4577bf1e..f340e1a833 100644 --- a/codex-rs/core/src/conversation_manager.rs +++ b/codex-rs/core/src/conversation_manager.rs @@ -1,5 +1,7 @@ use crate::AuthManager; use crate::CodexAuth; +#[cfg(any(test, feature = "test-support"))] +use crate::ModelProviderInfo; use crate::codex::Codex; use crate::codex::CodexSpawnOk; use crate::codex::INITIAL_SUBMIT_ID; @@ -7,6 +9,7 @@ use crate::codex_conversation::CodexConversation; use crate::config::Config; use crate::error::CodexErr; use crate::error::Result as CodexResult; +use crate::openai_models::models_manager::ModelsManager; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::SessionConfiguredEvent; @@ -14,6 +17,7 @@ use crate::rollout::RolloutRecorder; use codex_protocol::ConversationId; use codex_protocol::items::TurnItem; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::ModelPreset; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionSource; @@ -35,6 +39,7 @@ pub struct NewConversation { pub struct ConversationManager { conversations: Arc>>>, auth_manager: Arc, + models_manager: Arc, session_source: SessionSource, } @@ -42,18 +47,23 @@ impl ConversationManager { pub fn new(auth_manager: Arc, session_source: SessionSource) -> Self { Self { conversations: Arc::new(RwLock::new(HashMap::new())), - auth_manager, + auth_manager: auth_manager.clone(), session_source, + models_manager: Arc::new(ModelsManager::new(auth_manager)), } } + #[cfg(any(test, feature = "test-support"))] /// Construct with a dummy AuthManager containing the provided CodexAuth. /// Used for integration tests: should not be used by ordinary business logic. - pub fn with_auth(auth: CodexAuth) -> Self { - Self::new( - crate::AuthManager::from_auth_for_testing(auth), - SessionSource::Exec, - ) + pub fn with_models_provider(auth: CodexAuth, provider: ModelProviderInfo) -> Self { + let auth_manager = crate::AuthManager::from_auth_for_testing(auth); + Self { + conversations: Arc::new(RwLock::new(HashMap::new())), + auth_manager: auth_manager.clone(), + session_source: SessionSource::Exec, + models_manager: Arc::new(ModelsManager::with_provider(auth_manager, provider)), + } } pub fn session_source(&self) -> SessionSource { @@ -61,14 +71,19 @@ impl ConversationManager { } pub async fn new_conversation(&self, config: Config) -> CodexResult { - self.spawn_conversation(config, self.auth_manager.clone()) - .await + self.spawn_conversation( + config, + self.auth_manager.clone(), + self.models_manager.clone(), + ) + .await } async fn spawn_conversation( &self, config: Config, auth_manager: Arc, + models_manager: Arc, ) -> CodexResult { let CodexSpawnOk { codex, @@ -76,6 +91,7 @@ impl ConversationManager { } = Codex::spawn( config, auth_manager, + models_manager, InitialHistory::New, self.session_source.clone(), ) @@ -152,6 +168,7 @@ impl ConversationManager { } = Codex::spawn( config, auth_manager, + self.models_manager.clone(), initial_history, self.session_source.clone(), ) @@ -189,10 +206,25 @@ impl ConversationManager { let CodexSpawnOk { codex, conversation_id, - } = Codex::spawn(config, auth_manager, history, self.session_source.clone()).await?; + } = Codex::spawn( + config, + auth_manager, + self.models_manager.clone(), + history, + self.session_source.clone(), + ) + .await?; self.finalize_spawn(codex, conversation_id).await } + + pub async fn list_models(&self, config: &Config) -> Vec { + self.models_manager.list_models(config).await + } + + pub fn get_models_manager(&self) -> Arc { + self.models_manager.clone() + } } /// Return a prefix of `items` obtained by cutting strictly before the nth user message diff --git a/codex-rs/core/src/default_client.rs b/codex-rs/core/src/default_client.rs index 29986c401d..7ae2f8c35a 100644 --- a/codex-rs/core/src/default_client.rs +++ b/codex-rs/core/src/default_client.rs @@ -1,17 +1,12 @@ use crate::spawn::CODEX_SANDBOX_ENV_VAR; -use http::Error as HttpError; -use reqwest::IntoUrl; -use reqwest::Method; -use reqwest::Response; -use reqwest::header::HeaderName; use reqwest::header::HeaderValue; -use serde::Serialize; -use std::collections::HashMap; -use std::fmt::Display; use std::sync::LazyLock; use std::sync::Mutex; use std::sync::OnceLock; +use codex_client::CodexHttpClient; +pub use codex_client::CodexRequestBuilder; + /// Set this to add a suffix to the User-Agent string. /// /// It is not ideal that we're using a global singleton for this. @@ -31,129 +26,6 @@ pub static USER_AGENT_SUFFIX: LazyLock>> = LazyLock::new(|| pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs"; pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE"; -#[derive(Clone, Debug)] -pub struct CodexHttpClient { - inner: reqwest::Client, -} - -impl CodexHttpClient { - fn new(inner: reqwest::Client) -> Self { - Self { inner } - } - - pub fn get(&self, url: U) -> CodexRequestBuilder - where - U: IntoUrl, - { - self.request(Method::GET, url) - } - - pub fn post(&self, url: U) -> CodexRequestBuilder - where - U: IntoUrl, - { - self.request(Method::POST, url) - } - - pub fn request(&self, method: Method, url: U) -> CodexRequestBuilder - where - U: IntoUrl, - { - let url_str = url.as_str().to_string(); - CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str) - } -} - -#[must_use = "requests are not sent unless `send` is awaited"] -#[derive(Debug)] -pub struct CodexRequestBuilder { - builder: reqwest::RequestBuilder, - method: Method, - url: String, -} - -impl CodexRequestBuilder { - fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self { - Self { - builder, - method, - url, - } - } - - fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self { - Self { - builder: f(self.builder), - method: self.method, - url: self.url, - } - } - - pub fn header(self, key: K, value: V) -> Self - where - HeaderName: TryFrom, - >::Error: Into, - HeaderValue: TryFrom, - >::Error: Into, - { - self.map(|builder| builder.header(key, value)) - } - - pub fn bearer_auth(self, token: T) -> Self - where - T: Display, - { - self.map(|builder| builder.bearer_auth(token)) - } - - pub fn json(self, value: &T) -> Self - where - T: ?Sized + Serialize, - { - self.map(|builder| builder.json(value)) - } - - pub async fn send(self) -> Result { - match self.builder.send().await { - Ok(response) => { - let request_ids = Self::extract_request_ids(&response); - tracing::debug!( - method = %self.method, - url = %self.url, - status = %response.status(), - request_ids = ?request_ids, - version = ?response.version(), - "Request completed" - ); - - Ok(response) - } - Err(error) => { - let status = error.status(); - tracing::debug!( - method = %self.method, - url = %self.url, - status = status.map(|s| s.as_u16()), - error = %error, - "Request failed" - ); - Err(error) - } - } - } - - fn extract_request_ids(response: &Response) -> HashMap { - ["cf-ray", "x-request-id", "x-oai-request-id"] - .iter() - .filter_map(|&name| { - let header_name = HeaderName::from_static(name); - let value = response.headers().get(header_name)?; - let value = value.to_str().ok()?.to_owned(); - Some((name.to_owned(), value)) - }) - .collect() - } -} #[derive(Debug, Clone)] pub struct Originator { pub value: String, diff --git a/codex-rs/core/src/env.rs b/codex-rs/core/src/env.rs new file mode 100644 index 0000000000..5370c0ffd8 --- /dev/null +++ b/codex-rs/core/src/env.rs @@ -0,0 +1,19 @@ +//! Functions for environment detection that need to be shared across crates. + +/// Returns true if the current process is running under Windows Subsystem for Linux. +pub fn is_wsl() -> bool { + #[cfg(target_os = "linux")] + { + if std::env::var_os("WSL_DISTRO_NAME").is_some() { + return true; + } + match std::fs::read_to_string("/proc/version") { + Ok(version) => version.to_lowercase().contains("microsoft"), + Err(_) => false, + } + } + #[cfg(not(target_os = "linux"))] + { + false + } +} diff --git a/codex-rs/core/src/environment_context.rs b/codex-rs/core/src/environment_context.rs index 56e7f6cadb..54756bda2d 100644 --- a/codex-rs/core/src/environment_context.rs +++ b/codex-rs/core/src/environment_context.rs @@ -6,7 +6,6 @@ use crate::codex::TurnContext; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; use crate::shell::Shell; -use crate::shell::default_user_shell; use codex_protocol::config_types::SandboxMode; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; @@ -95,7 +94,7 @@ impl EnvironmentContext { && self.writable_roots == *writable_roots } - pub fn diff(before: &TurnContext, after: &TurnContext) -> Self { + pub fn diff(before: &TurnContext, after: &TurnContext, shell: &Shell) -> Self { let cwd = if before.cwd != after.cwd { Some(after.cwd.clone()) } else { @@ -111,18 +110,15 @@ impl EnvironmentContext { } else { None }; - EnvironmentContext::new(cwd, approval_policy, sandbox_policy, default_user_shell()) + EnvironmentContext::new(cwd, approval_policy, sandbox_policy, shell.clone()) } -} -impl From<&TurnContext> for EnvironmentContext { - fn from(turn_context: &TurnContext) -> Self { + pub fn from_turn_context(turn_context: &TurnContext, shell: &Shell) -> Self { Self::new( Some(turn_context.cwd.clone()), Some(turn_context.approval_policy), Some(turn_context.sandbox_policy.clone()), - // Shell is not configurable from turn to turn - default_user_shell(), + shell.clone(), ) } } @@ -201,6 +197,7 @@ mod tests { Shell { shell_type: ShellType::Bash, shell_path: PathBuf::from("/bin/bash"), + shell_snapshot: None, } } @@ -338,6 +335,7 @@ mod tests { Shell { shell_type: ShellType::Bash, shell_path: "/bin/bash".into(), + shell_snapshot: None, }, ); let context2 = EnvironmentContext::new( @@ -347,6 +345,7 @@ mod tests { Shell { shell_type: ShellType::Zsh, shell_path: "/bin/zsh".into(), + shell_snapshot: None, }, ); diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index a25261d649..c7e0c2bde2 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -1,4 +1,3 @@ -use crate::codex::ProcessedResponseItem; use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; @@ -61,9 +60,7 @@ pub enum SandboxErr { pub enum CodexErr { // todo(aibrahim): git rid of this error carrying the dangling artifacts #[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")] - TurnAborted { - dangling_artifacts: Vec, - }, + TurnAborted, /// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP /// handshake has succeeded but **before** it finished emitting `response.completed`. @@ -181,9 +178,7 @@ pub enum CodexErr { impl From for CodexErr { fn from(_: CancelErr) -> Self { - CodexErr::TurnAborted { - dangling_artifacts: Vec::new(), - } + CodexErr::TurnAborted } } @@ -565,6 +560,7 @@ mod tests { resets_at: Some(secondary_reset_at), }), credits: None, + plan_type: None, } } diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index 6b4bed4db3..6ab6291a4b 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -13,6 +13,7 @@ use codex_protocol::user_input::UserInput; use tracing::warn; use uuid::Uuid; +use crate::user_instructions::SkillInstructions; use crate::user_instructions::UserInstructions; use crate::user_shell_command::is_user_shell_command_text; @@ -23,7 +24,9 @@ fn is_session_prefix(text: &str) -> bool { } fn parse_user_message(message: &[ContentItem]) -> Option { - if UserInstructions::is_user_instructions(message) { + if UserInstructions::is_user_instructions(message) + || SkillInstructions::is_skill_instructions(message) + { return None; } @@ -198,14 +201,22 @@ mod tests { text: "# AGENTS.md instructions for test_directory\n\n\ntest_text\n".to_string(), }], }, - ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { - text: "echo 42".to_string(), - }], - }, - ]; + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "\ndemo\nskills/demo/SKILL.md\nbody\n" + .to_string(), + }], + }, + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "echo 42".to_string(), + }], + }, + ]; for item in items { let turn_item = parse_turn_item(&item); diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index f46444675f..596f325059 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -28,6 +28,7 @@ use crate::protocol::SandboxPolicy; use crate::sandboxing::CommandSpec; use crate::sandboxing::ExecEnv; use crate::sandboxing::SandboxManager; +use crate::sandboxing::SandboxPermissions; use crate::spawn::StdioPolicy; use crate::spawn::spawn_child_async; use crate::text_encoding::bytes_to_string_smart; @@ -55,7 +56,7 @@ pub struct ExecParams { pub cwd: PathBuf, pub expiration: ExecExpiration, pub env: HashMap, - pub with_escalated_permissions: Option, + pub sandbox_permissions: SandboxPermissions, pub justification: Option, pub arg0: Option, } @@ -144,7 +145,7 @@ pub async fn process_exec_tool_call( cwd, expiration, env, - with_escalated_permissions, + sandbox_permissions, justification, arg0: _, } = params; @@ -162,7 +163,7 @@ pub async fn process_exec_tool_call( cwd, env, expiration, - with_escalated_permissions, + sandbox_permissions, justification, }; @@ -192,7 +193,7 @@ pub(crate) async fn execute_exec_env( env, expiration, sandbox, - with_escalated_permissions, + sandbox_permissions, justification, arg0, } = env; @@ -202,7 +203,7 @@ pub(crate) async fn execute_exec_env( cwd, expiration, env, - with_escalated_permissions, + sandbox_permissions, justification, arg0, }; @@ -485,6 +486,19 @@ pub struct ExecToolCallOutput { pub timed_out: bool, } +impl Default for ExecToolCallOutput { + fn default() -> Self { + Self { + exit_code: 0, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new(String::new()), + duration: Duration::ZERO, + timed_out: false, + } + } +} + #[cfg_attr(not(target_os = "windows"), allow(unused_variables))] async fn exec( params: ExecParams, @@ -844,7 +858,7 @@ mod tests { cwd: std::env::current_dir()?, expiration: 500.into(), env, - with_escalated_permissions: None, + sandbox_permissions: SandboxPermissions::UseDefault, justification: None, arg0: None, }; @@ -889,7 +903,7 @@ mod tests { cwd: cwd.clone(), expiration: ExecExpiration::Cancellation(cancel_token), env, - with_escalated_permissions: None, + sandbox_permissions: SandboxPermissions::UseDefault, justification: None, arg0: None, }; diff --git a/codex-rs/core/src/exec_policy.rs b/codex-rs/core/src/exec_policy.rs index 602e5d679e..8917610ffc 100644 --- a/codex-rs/core/src/exec_policy.rs +++ b/codex-rs/core/src/exec_policy.rs @@ -4,25 +4,42 @@ use std::path::PathBuf; use std::sync::Arc; use crate::command_safety::is_dangerous_command::requires_initial_appoval; +use codex_execpolicy::AmendError; use codex_execpolicy::Decision; +use codex_execpolicy::Error as ExecPolicyRuleError; use codex_execpolicy::Evaluation; use codex_execpolicy::Policy; use codex_execpolicy::PolicyParser; +use codex_execpolicy::RuleMatch; +use codex_execpolicy::blocking_append_allow_prefix_rule; +use codex_protocol::approvals::ExecPolicyAmendment; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::SandboxPolicy; use thiserror::Error; use tokio::fs; +use tokio::sync::RwLock; +use tokio::task::spawn_blocking; use crate::bash::parse_shell_lc_plain_commands; use crate::features::Feature; use crate::features::Features; use crate::sandboxing::SandboxPermissions; -use crate::tools::sandboxing::ApprovalRequirement; +use crate::tools::sandboxing::ExecApprovalRequirement; const FORBIDDEN_REASON: &str = "execpolicy forbids this command"; +const PROMPT_CONFLICT_REASON: &str = + "execpolicy requires approval for this command, but AskForApproval is set to Never"; const PROMPT_REASON: &str = "execpolicy requires approval for this command"; -const POLICY_DIR_NAME: &str = "policy"; -const POLICY_EXTENSION: &str = "codexpolicy"; +const RULES_DIR_NAME: &str = "rules"; +const RULE_EXTENSION: &str = "rules"; +const DEFAULT_POLICY_FILE: &str = "default.rules"; + +fn is_policy_match(rule_match: &RuleMatch) -> bool { + match rule_match { + RuleMatch::PrefixRuleMatch { .. } => true, + RuleMatch::HeuristicsRuleMatch { .. } => false, + } +} #[derive(Debug, Error)] pub enum ExecPolicyError { @@ -45,15 +62,37 @@ pub enum ExecPolicyError { }, } -pub(crate) async fn exec_policy_for( +#[derive(Debug, Error)] +pub enum ExecPolicyUpdateError { + #[error("failed to update execpolicy file {path}: {source}")] + AppendRule { path: PathBuf, source: AmendError }, + + #[error("failed to join blocking execpolicy update task: {source}")] + JoinBlockingTask { source: tokio::task::JoinError }, + + #[error("failed to update in-memory execpolicy: {source}")] + AddRule { + #[from] + source: ExecPolicyRuleError, + }, + + #[error("cannot append execpolicy rule because execpolicy feature is disabled")] + FeatureDisabled, +} + +pub(crate) async fn load_exec_policy_for_features( features: &Features, codex_home: &Path, -) -> Result, ExecPolicyError> { +) -> Result { if !features.enabled(Feature::ExecPolicy) { - return Ok(Arc::new(Policy::empty())); + Ok(Policy::empty()) + } else { + load_exec_policy(codex_home).await } +} - let policy_dir = codex_home.join(POLICY_DIR_NAME); +pub async fn load_exec_policy(codex_home: &Path) -> Result { + let policy_dir = codex_home.join(RULES_DIR_NAME); let policy_paths = collect_policy_files(&policy_dir).await?; let mut parser = PolicyParser::new(); @@ -74,7 +113,7 @@ pub(crate) async fn exec_policy_for( })?; } - let policy = Arc::new(parser.build()); + let policy = parser.build(); tracing::debug!( "loaded execpolicy from {} files in {}", policy_paths.len(), @@ -84,59 +123,150 @@ pub(crate) async fn exec_policy_for( Ok(policy) } -fn evaluate_with_policy( - policy: &Policy, - command: &[String], - approval_policy: AskForApproval, -) -> Option { - let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]); - let evaluation = policy.check_multiple(commands.iter()); - - match evaluation { - Evaluation::Match { decision, .. } => match decision { - Decision::Forbidden => Some(ApprovalRequirement::Forbidden { - reason: FORBIDDEN_REASON.to_string(), - }), - Decision::Prompt => { - let reason = PROMPT_REASON.to_string(); - if matches!(approval_policy, AskForApproval::Never) { - Some(ApprovalRequirement::Forbidden { reason }) - } else { - Some(ApprovalRequirement::NeedsApproval { - reason: Some(reason), - }) - } - } - Decision::Allow => Some(ApprovalRequirement::Skip { - bypass_sandbox: true, - }), - }, - Evaluation::NoMatch { .. } => None, - } +pub(crate) fn default_policy_path(codex_home: &Path) -> PathBuf { + codex_home.join(RULES_DIR_NAME).join(DEFAULT_POLICY_FILE) } -pub(crate) async fn create_approval_requirement_for_command( - policy: &Policy, +pub(crate) async fn append_execpolicy_amendment_and_update( + codex_home: &Path, + current_policy: &Arc>, + prefix: &[String], +) -> Result<(), ExecPolicyUpdateError> { + let policy_path = default_policy_path(codex_home); + let prefix = prefix.to_vec(); + spawn_blocking({ + let policy_path = policy_path.clone(); + let prefix = prefix.clone(); + move || blocking_append_allow_prefix_rule(&policy_path, &prefix) + }) + .await + .map_err(|source| ExecPolicyUpdateError::JoinBlockingTask { source })? + .map_err(|source| ExecPolicyUpdateError::AppendRule { + path: policy_path, + source, + })?; + + current_policy + .write() + .await + .add_prefix_rule(&prefix, Decision::Allow)?; + + Ok(()) +} + +/// Derive a proposed execpolicy amendment when a command requires user approval +/// - If any execpolicy rule prompts, return None, because an amendment would not skip that policy requirement. +/// - Otherwise return the first heuristics Prompt. +/// - Examples: +/// - execpolicy: empty. Command: `["python"]`. Heuristics prompt -> `Some(vec!["python"])`. +/// - execpolicy: empty. Command: `["bash", "-c", "cd /some/folder && prog1 --option1 arg1 && prog2 --option2 arg2"]`. +/// Parsed commands include `cd /some/folder`, `prog1 --option1 arg1`, and `prog2 --option2 arg2`. If heuristics allow `cd` but prompt +/// on `prog1`, we return `Some(vec!["prog1", "--option1", "arg1"])`. +/// - execpolicy: contains a `prompt for prefix ["prog2"]` rule. For the same command as above, +/// we return `None` because an execpolicy prompt still applies even if we amend execpolicy to allow ["prog1", "--option1", "arg1"]. +fn try_derive_execpolicy_amendment_for_prompt_rules( + matched_rules: &[RuleMatch], +) -> Option { + if matched_rules + .iter() + .any(|rule_match| is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt) + { + return None; + } + + matched_rules + .iter() + .find_map(|rule_match| match rule_match { + RuleMatch::HeuristicsRuleMatch { + command, + decision: Decision::Prompt, + } => Some(ExecPolicyAmendment::from(command.clone())), + _ => None, + }) +} + +/// - Note: we only use this amendment when the command fails to run in sandbox and codex prompts the user to run outside the sandbox +/// - The purpose of this amendment is to bypass sandbox for similar commands in the future +/// - If any execpolicy rule matches, return None, because we would already be running command outside the sandbox +fn try_derive_execpolicy_amendment_for_allow_rules( + matched_rules: &[RuleMatch], +) -> Option { + if matched_rules.iter().any(is_policy_match) { + return None; + } + + matched_rules + .iter() + .find_map(|rule_match| match rule_match { + RuleMatch::HeuristicsRuleMatch { + command, + decision: Decision::Allow, + } => Some(ExecPolicyAmendment::from(command.clone())), + _ => None, + }) +} + +/// Only return PROMPT_REASON when an execpolicy rule drove the prompt decision. +fn derive_prompt_reason(evaluation: &Evaluation) -> Option { + evaluation.matched_rules.iter().find_map(|rule_match| { + if is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt { + Some(PROMPT_REASON.to_string()) + } else { + None + } + }) +} + +pub(crate) async fn create_exec_approval_requirement_for_command( + exec_policy: &Arc>, + features: &Features, command: &[String], approval_policy: AskForApproval, sandbox_policy: &SandboxPolicy, sandbox_permissions: SandboxPermissions, -) -> ApprovalRequirement { - if let Some(requirement) = evaluate_with_policy(policy, command, approval_policy) { - return requirement; - } - - if requires_initial_appoval( - approval_policy, - sandbox_policy, - command, - sandbox_permissions, - ) { - ApprovalRequirement::NeedsApproval { reason: None } - } else { - ApprovalRequirement::Skip { - bypass_sandbox: false, +) -> ExecApprovalRequirement { + let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]); + let heuristics_fallback = |cmd: &[String]| { + if requires_initial_appoval(approval_policy, sandbox_policy, cmd, sandbox_permissions) { + Decision::Prompt + } else { + Decision::Allow } + }; + let policy = exec_policy.read().await; + let evaluation = policy.check_multiple(commands.iter(), &heuristics_fallback); + + match evaluation.decision { + Decision::Forbidden => ExecApprovalRequirement::Forbidden { + reason: FORBIDDEN_REASON.to_string(), + }, + Decision::Prompt => { + if matches!(approval_policy, AskForApproval::Never) { + ExecApprovalRequirement::Forbidden { + reason: PROMPT_CONFLICT_REASON.to_string(), + } + } else { + ExecApprovalRequirement::NeedsApproval { + reason: derive_prompt_reason(&evaluation), + proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { + try_derive_execpolicy_amendment_for_prompt_rules(&evaluation.matched_rules) + } else { + None + }, + } + } + } + Decision::Allow => ExecApprovalRequirement::Skip { + // Bypass sandbox if execpolicy allows the command + bypass_sandbox: evaluation.matched_rules.iter().any(|rule_match| { + is_policy_match(rule_match) && rule_match.decision() == Decision::Allow + }), + proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { + try_derive_execpolicy_amendment_for_allow_rules(&evaluation.matched_rules) + } else { + None + }, + }, } } @@ -174,7 +304,7 @@ async fn collect_policy_files(dir: &Path) -> Result, ExecPolicyErro if path .extension() .and_then(|ext| ext.to_str()) - .is_some_and(|ext| ext == POLICY_EXTENSION) + .is_some_and(|ext| ext == RULE_EXTENSION) && file_type.is_file() { policy_paths.push(path); @@ -195,6 +325,7 @@ mod tests { use codex_protocol::protocol::SandboxPolicy; use pretty_assertions::assert_eq; use std::fs; + use std::sync::Arc; use tempfile::tempdir; #[tokio::test] @@ -203,23 +334,29 @@ mod tests { features.disable(Feature::ExecPolicy); let temp_dir = tempdir().expect("create temp dir"); - let policy = exec_policy_for(&features, temp_dir.path()) + let policy = load_exec_policy_for_features(&features, temp_dir.path()) .await .expect("policy result"); let commands = [vec!["rm".to_string()]]; - assert!(matches!( - policy.check_multiple(commands.iter()), - Evaluation::NoMatch { .. } - )); - assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists()); + assert_eq!( + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::HeuristicsRuleMatch { + command: vec!["rm".to_string()], + decision: Decision::Allow + }], + }, + policy.check_multiple(commands.iter(), &|_| Decision::Allow) + ); + assert!(!temp_dir.path().join(RULES_DIR_NAME).exists()); } #[tokio::test] async fn collect_policy_files_returns_empty_when_dir_missing() { let temp_dir = tempdir().expect("create temp dir"); - let policy_dir = temp_dir.path().join(POLICY_DIR_NAME); + let policy_dir = temp_dir.path().join(RULES_DIR_NAME); let files = collect_policy_files(&policy_dir) .await .expect("collect policy files"); @@ -230,53 +367,65 @@ mod tests { #[tokio::test] async fn loads_policies_from_policy_subdirectory() { let temp_dir = tempdir().expect("create temp dir"); - let policy_dir = temp_dir.path().join(POLICY_DIR_NAME); + let policy_dir = temp_dir.path().join(RULES_DIR_NAME); fs::create_dir_all(&policy_dir).expect("create policy dir"); fs::write( - policy_dir.join("deny.codexpolicy"), + policy_dir.join("deny.rules"), r#"prefix_rule(pattern=["rm"], decision="forbidden")"#, ) .expect("write policy file"); - let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path()) + let policy = load_exec_policy(temp_dir.path()) .await .expect("policy result"); let command = [vec!["rm".to_string()]]; - assert!(matches!( - policy.check_multiple(command.iter()), - Evaluation::Match { .. } - )); + assert_eq!( + Evaluation { + decision: Decision::Forbidden, + matched_rules: vec![RuleMatch::PrefixRuleMatch { + matched_prefix: vec!["rm".to_string()], + decision: Decision::Forbidden + }], + }, + policy.check_multiple(command.iter(), &|_| Decision::Allow) + ); } #[tokio::test] async fn ignores_policies_outside_policy_dir() { let temp_dir = tempdir().expect("create temp dir"); fs::write( - temp_dir.path().join("root.codexpolicy"), + temp_dir.path().join("root.rules"), r#"prefix_rule(pattern=["ls"], decision="prompt")"#, ) .expect("write policy file"); - let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path()) + let policy = load_exec_policy(temp_dir.path()) .await .expect("policy result"); let command = [vec!["ls".to_string()]]; - assert!(matches!( - policy.check_multiple(command.iter()), - Evaluation::NoMatch { .. } - )); + assert_eq!( + Evaluation { + decision: Decision::Allow, + matched_rules: vec![RuleMatch::HeuristicsRuleMatch { + command: vec!["ls".to_string()], + decision: Decision::Allow + }], + }, + policy.check_multiple(command.iter(), &|_| Decision::Allow) + ); } - #[test] - fn evaluates_bash_lc_inner_commands() { + #[tokio::test] + async fn evaluates_bash_lc_inner_commands() { let policy_src = r#" prefix_rule(pattern=["rm"], decision="forbidden") "#; let mut parser = PolicyParser::new(); parser - .parse("test.codexpolicy", policy_src) + .parse("test.rules", policy_src) .expect("parse policy"); - let policy = parser.build(); + let policy = Arc::new(RwLock::new(parser.build())); let forbidden_script = vec![ "bash".to_string(), @@ -284,30 +433,37 @@ prefix_rule(pattern=["rm"], decision="forbidden") "rm -rf /tmp".to_string(), ]; - let requirement = - evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest) - .expect("expected match for forbidden command"); + let requirement = create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &forbidden_script, + AskForApproval::OnRequest, + &SandboxPolicy::DangerFullAccess, + SandboxPermissions::UseDefault, + ) + .await; assert_eq!( requirement, - ApprovalRequirement::Forbidden { + ExecApprovalRequirement::Forbidden { reason: FORBIDDEN_REASON.to_string() } ); } #[tokio::test] - async fn approval_requirement_prefers_execpolicy_match() { + async fn exec_approval_requirement_prefers_execpolicy_match() { let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#; let mut parser = PolicyParser::new(); parser - .parse("test.codexpolicy", policy_src) + .parse("test.rules", policy_src) .expect("parse policy"); - let policy = parser.build(); + let policy = Arc::new(RwLock::new(parser.build())); let command = vec!["rm".to_string()]; - let requirement = create_approval_requirement_for_command( + let requirement = create_exec_approval_requirement_for_command( &policy, + &Features::with_defaults(), &command, AskForApproval::OnRequest, &SandboxPolicy::DangerFullAccess, @@ -317,24 +473,26 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, - ApprovalRequirement::NeedsApproval { - reason: Some(PROMPT_REASON.to_string()) + ExecApprovalRequirement::NeedsApproval { + reason: Some(PROMPT_REASON.to_string()), + proposed_execpolicy_amendment: None, } ); } #[tokio::test] - async fn approval_requirement_respects_approval_policy() { + async fn exec_approval_requirement_respects_approval_policy() { let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#; let mut parser = PolicyParser::new(); parser - .parse("test.codexpolicy", policy_src) + .parse("test.rules", policy_src) .expect("parse policy"); - let policy = parser.build(); + let policy = Arc::new(RwLock::new(parser.build())); let command = vec!["rm".to_string()]; - let requirement = create_approval_requirement_for_command( + let requirement = create_exec_approval_requirement_for_command( &policy, + &Features::with_defaults(), &command, AskForApproval::Never, &SandboxPolicy::DangerFullAccess, @@ -344,19 +502,20 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, - ApprovalRequirement::Forbidden { - reason: PROMPT_REASON.to_string() + ExecApprovalRequirement::Forbidden { + reason: PROMPT_CONFLICT_REASON.to_string() } ); } #[tokio::test] - async fn approval_requirement_falls_back_to_heuristics() { - let command = vec!["python".to_string()]; + async fn exec_approval_requirement_falls_back_to_heuristics() { + let command = vec!["cargo".to_string(), "build".to_string()]; - let empty_policy = Policy::empty(); - let requirement = create_approval_requirement_for_command( + let empty_policy = Arc::new(RwLock::new(Policy::empty())); + let requirement = create_exec_approval_requirement_for_command( &empty_policy, + &Features::with_defaults(), &command, AskForApproval::UnlessTrusted, &SandboxPolicy::ReadOnly, @@ -366,7 +525,285 @@ prefix_rule(pattern=["rm"], decision="forbidden") assert_eq!( requirement, - ApprovalRequirement::NeedsApproval { reason: None } + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)) + } + ); + } + + #[tokio::test] + async fn heuristics_apply_when_other_commands_match_policy() { + let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.rules", policy_src) + .expect("parse policy"); + let policy = Arc::new(RwLock::new(parser.build())); + let command = vec![ + "bash".to_string(), + "-lc".to_string(), + "apple | orange".to_string(), + ]; + + assert_eq!( + create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &command, + AskForApproval::UnlessTrusted, + &SandboxPolicy::DangerFullAccess, + SandboxPermissions::UseDefault, + ) + .await, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![ + "orange".to_string() + ])) + } + ); + } + + #[tokio::test] + async fn append_execpolicy_amendment_updates_policy_and_file() { + let codex_home = tempdir().expect("create temp dir"); + let current_policy = Arc::new(RwLock::new(Policy::empty())); + let prefix = vec!["echo".to_string(), "hello".to_string()]; + + append_execpolicy_amendment_and_update(codex_home.path(), ¤t_policy, &prefix) + .await + .expect("update policy"); + + let evaluation = current_policy.read().await.check( + &["echo".to_string(), "hello".to_string(), "world".to_string()], + &|_| Decision::Allow, + ); + assert!(matches!( + evaluation, + Evaluation { + decision: Decision::Allow, + .. + } + )); + + let contents = fs::read_to_string(default_policy_path(codex_home.path())) + .expect("policy file should have been created"); + assert_eq!( + contents, + r#"prefix_rule(pattern=["echo", "hello"], decision="allow") +"# + ); + } + + #[tokio::test] + async fn append_execpolicy_amendment_rejects_empty_prefix() { + let codex_home = tempdir().expect("create temp dir"); + let current_policy = Arc::new(RwLock::new(Policy::empty())); + + let result = + append_execpolicy_amendment_and_update(codex_home.path(), ¤t_policy, &[]).await; + + assert!(matches!( + result, + Err(ExecPolicyUpdateError::AppendRule { + source: AmendError::EmptyPrefix, + .. + }) + )); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_present_for_single_command_without_policy_match() { + let command = vec!["cargo".to_string(), "build".to_string()]; + + let empty_policy = Arc::new(RwLock::new(Policy::empty())); + let requirement = create_exec_approval_requirement_for_command( + &empty_policy, + &Features::with_defaults(), + &command, + AskForApproval::UnlessTrusted, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)) + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_disabled_when_execpolicy_feature_disabled() { + let command = vec!["cargo".to_string(), "build".to_string()]; + + let mut features = Features::with_defaults(); + features.disable(Feature::ExecPolicy); + + let requirement = create_exec_approval_requirement_for_command( + &Arc::new(RwLock::new(Policy::empty())), + &features, + &command, + AskForApproval::UnlessTrusted, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: None, + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_omitted_when_policy_prompts() { + let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.rules", policy_src) + .expect("parse policy"); + let policy = Arc::new(RwLock::new(parser.build())); + let command = vec!["rm".to_string()]; + + let requirement = create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &command, + AskForApproval::OnRequest, + &SandboxPolicy::DangerFullAccess, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::NeedsApproval { + reason: Some(PROMPT_REASON.to_string()), + proposed_execpolicy_amendment: None, + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_present_for_multi_command_scripts() { + let command = vec![ + "bash".to_string(), + "-lc".to_string(), + "cargo build && echo ok".to_string(), + ]; + let requirement = create_exec_approval_requirement_for_command( + &Arc::new(RwLock::new(Policy::empty())), + &Features::with_defaults(), + &command, + AskForApproval::UnlessTrusted, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![ + "cargo".to_string(), + "build".to_string() + ])), + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_uses_first_no_match_in_multi_command_scripts() { + let policy_src = r#"prefix_rule(pattern=["cat"], decision="allow")"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.rules", policy_src) + .expect("parse policy"); + let policy = Arc::new(RwLock::new(parser.build())); + + let command = vec![ + "bash".to_string(), + "-lc".to_string(), + "cat && apple".to_string(), + ]; + + assert_eq!( + create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &command, + AskForApproval::UnlessTrusted, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await, + ExecApprovalRequirement::NeedsApproval { + reason: None, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![ + "apple".to_string() + ])), + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_present_when_heuristics_allow() { + let command = vec!["echo".to_string(), "safe".to_string()]; + + let requirement = create_exec_approval_requirement_for_command( + &Arc::new(RwLock::new(Policy::empty())), + &Features::with_defaults(), + &command, + AskForApproval::OnRequest, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::Skip { + bypass_sandbox: false, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)), + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_suppressed_when_policy_matches_allow() { + let policy_src = r#"prefix_rule(pattern=["echo"], decision="allow")"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.rules", policy_src) + .expect("parse policy"); + let policy = Arc::new(RwLock::new(parser.build())); + let command = vec!["echo".to_string(), "safe".to_string()]; + + let requirement = create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &command, + AskForApproval::OnRequest, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::Skip { + bypass_sandbox: true, + proposed_execpolicy_amendment: None, + } ); } } diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 687e8b1627..a011884fc4 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -27,34 +27,41 @@ pub enum Stage { /// Unique features toggled via configuration. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Feature { + // Stable. /// Create a ghost commit at each turn. GhostCommit, + /// Include the view_image tool. + ViewImageTool, + /// Send warnings to the model to correct it on the tool usage. + ModelWarnings, + /// Enable the default shell tool. + ShellTool, + + // Experimental /// Use the single unified PTY-backed exec tool. UnifiedExec, /// Enable experimental RMCP features such as OAuth login. RmcpClient, /// Include the freeform apply_patch tool. ApplyPatchFreeform, - /// Include the view_image tool. - ViewImageTool, /// Allow the model to request web searches. WebSearchRequest, /// Gate the execpolicy enforcement for shell/unified exec. ExecPolicy, - /// Enable the model-based risk assessments for sandboxed commands. - SandboxCommandAssessment, /// Enable Windows sandbox (restricted token) on Windows. WindowsSandbox, /// Remote compaction enabled (only for ChatGPT auth) RemoteCompaction, - /// Enable the default shell tool. - ShellTool, + /// Refresh remote models and emit AppReady once the list is available. + RemoteModels, /// Allow model to call multiple tools in parallel (only for models supporting it). ParallelToolCalls, /// Experimental skills injection (CLI flag-driven). Skills, - /// Send warnings to the model to correct it on the tool usage. - ModelWarnings, + /// Experimental shell snapshotting. + ShellSnapshot, + /// Experimental TUI v2 (viewport) implementation. + Tui2, } impl Feature { @@ -95,7 +102,6 @@ pub struct Features { pub struct FeatureOverrides { pub include_apply_patch_tool: Option, pub web_search_request: Option, - pub experimental_sandbox_command_assessment: Option, } impl FeatureOverrides { @@ -187,7 +193,6 @@ impl Features { let mut features = Features::with_defaults(); let base_legacy = LegacyFeatureToggles { - experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch, experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool, experimental_use_rmcp_client: cfg.experimental_use_rmcp_client, @@ -203,8 +208,6 @@ impl Features { let profile_legacy = LegacyFeatureToggles { include_apply_patch_tool: config_profile.include_apply_patch_tool, - experimental_sandbox_command_assessment: config_profile - .experimental_sandbox_command_assessment, experimental_use_freeform_apply_patch: config_profile .experimental_use_freeform_apply_patch, @@ -263,6 +266,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Stable, default_enabled: true, }, + FeatureSpec { + id: Feature::ParallelToolCalls, + key: "parallel", + stage: Stage::Stable, + default_enabled: true, + }, FeatureSpec { id: Feature::ViewImageTool, key: "view_image_tool", @@ -275,6 +284,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Stable, default_enabled: true, }, + FeatureSpec { + id: Feature::ModelWarnings, + key: "warnings", + stage: Stage::Stable, + default_enabled: true, + }, // Unstable features. FeatureSpec { id: Feature::UnifiedExec, @@ -306,12 +321,6 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Experimental, default_enabled: true, }, - FeatureSpec { - id: Feature::SandboxCommandAssessment, - key: "experimental_sandbox_command_assessment", - stage: Stage::Experimental, - default_enabled: false, - }, FeatureSpec { id: Feature::WindowsSandbox, key: "enable_experimental_windows_sandbox", @@ -325,14 +334,8 @@ pub const FEATURES: &[FeatureSpec] = &[ default_enabled: true, }, FeatureSpec { - id: Feature::ParallelToolCalls, - key: "parallel", - stage: Stage::Experimental, - default_enabled: false, - }, - FeatureSpec { - id: Feature::ModelWarnings, - key: "warnings", + id: Feature::RemoteModels, + key: "remote_models", stage: Stage::Experimental, default_enabled: false, }, @@ -342,4 +345,16 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Experimental, default_enabled: false, }, + FeatureSpec { + id: Feature::ShellSnapshot, + key: "shell_snapshot", + stage: Stage::Experimental, + default_enabled: false, + }, + FeatureSpec { + id: Feature::Tui2, + key: "tui2", + stage: Stage::Experimental, + default_enabled: false, + }, ]; diff --git a/codex-rs/core/src/features/legacy.rs b/codex-rs/core/src/features/legacy.rs index 4d59f2a9a3..0c74d380e8 100644 --- a/codex-rs/core/src/features/legacy.rs +++ b/codex-rs/core/src/features/legacy.rs @@ -9,10 +9,6 @@ struct Alias { } const ALIASES: &[Alias] = &[ - Alias { - legacy_key: "experimental_sandbox_command_assessment", - feature: Feature::SandboxCommandAssessment, - }, Alias { legacy_key: "experimental_use_unified_exec_tool", feature: Feature::UnifiedExec, @@ -48,7 +44,6 @@ pub(crate) fn feature_for_key(key: &str) -> Option { #[derive(Debug, Default)] pub struct LegacyFeatureToggles { pub include_apply_patch_tool: Option, - pub experimental_sandbox_command_assessment: Option, pub experimental_use_freeform_apply_patch: Option, pub experimental_use_unified_exec_tool: Option, pub experimental_use_rmcp_client: Option, @@ -64,12 +59,6 @@ impl LegacyFeatureToggles { self.include_apply_patch_tool, "include_apply_patch_tool", ); - set_if_some( - features, - Feature::SandboxCommandAssessment, - self.experimental_sandbox_command_assessment, - "experimental_sandbox_command_assessment", - ); set_if_some( features, Feature::ApplyPatchFreeform, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index d9ab6ee51f..f69e7b8fb1 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -21,6 +21,7 @@ pub mod config; pub mod config_loader; mod context_manager; pub mod custom_prompts; +pub mod env; mod environment_context; pub mod error; pub mod exec; @@ -32,6 +33,7 @@ pub mod git_info; pub mod landlock; pub mod mcp; mod mcp_connection_manager; +pub mod openai_models; pub use mcp_connection_manager::MCP_SANDBOX_STATE_CAPABILITY; pub use mcp_connection_manager::MCP_SANDBOX_STATE_NOTIFICATION; pub use mcp_connection_manager::SandboxState; @@ -40,8 +42,8 @@ mod message_history; mod model_provider_info; pub mod parse_command; pub mod powershell; -mod response_processing; pub mod sandboxing; +mod stream_events_utils; mod text_encoding; pub mod token_data; mod truncate; @@ -66,13 +68,12 @@ pub use conversation_manager::NewConversation; pub use auth::AuthManager; pub use auth::CodexAuth; pub mod default_client; -pub mod model_family; -mod openai_model_info; pub mod project_doc; mod rollout; pub(crate) mod safety; pub mod seatbelt; pub mod shell; +pub mod shell_snapshot; pub mod skills; pub mod spawn; pub mod terminal; @@ -97,7 +98,10 @@ mod user_shell_command; pub mod util; pub use apply_patch::CODEX_APPLY_PATCH_ARG1; +pub use command_safety::is_dangerous_command; pub use command_safety::is_safe_command; +pub use exec_policy::ExecPolicyError; +pub use exec_policy::load_exec_policy; pub use safety::get_platform_sandbox; pub use safety::set_windows_sandbox_enabled; // Re-export the protocol types from the standalone `codex-protocol` crate so existing diff --git a/codex-rs/core/src/message_history.rs b/codex-rs/core/src/message_history.rs index e46dd93067..ecc6851336 100644 --- a/codex-rs/core/src/message_history.rs +++ b/codex-rs/core/src/message_history.rs @@ -590,7 +590,7 @@ mod tests { assert_eq!(entries.len(), 1); assert_eq!(entries[0].text, long_entry); - let pruned_len = std::fs::metadata(&history_path).expect("metadata").len() as u64; + let pruned_len = std::fs::metadata(&history_path).expect("metadata").len(); let max_bytes = config .history .max_bytes diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs deleted file mode 100644 index 5dea1c0168..0000000000 --- a/codex-rs/core/src/model_family.rs +++ /dev/null @@ -1,263 +0,0 @@ -use codex_protocol::config_types::ReasoningEffort; -use codex_protocol::config_types::Verbosity; - -use crate::config::types::ReasoningSummaryFormat; -use crate::tools::handlers::apply_patch::ApplyPatchToolType; -use crate::tools::spec::ConfigShellToolType; -use crate::truncate::TruncationPolicy; - -/// The `instructions` field in the payload sent to a model should always start -/// with this content. -const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md"); - -const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../gpt_5_codex_prompt.md"); -const GPT_5_1_INSTRUCTIONS: &str = include_str!("../gpt_5_1_prompt.md"); -const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../gpt-5.1-codex-max_prompt.md"); - -/// A model family is a group of models that share certain characteristics. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ModelFamily { - /// The full model slug used to derive this model family, e.g. - /// "gpt-4.1-2025-04-14". - pub slug: String, - - /// The model family name, e.g. "gpt-4.1". Note this should able to be used - /// with [`crate::openai_model_info::get_model_info`]. - pub family: String, - - /// True if the model needs additional instructions on how to use the - /// "virtual" `apply_patch` CLI. - pub needs_special_apply_patch_instructions: bool, - - // Whether the `reasoning` field can be set when making a request to this - // model family. Note it has `effort` and `summary` subfields (though - // `summary` is optional). - pub supports_reasoning_summaries: bool, - - // The reasoning effort to use for this model family when none is explicitly chosen. - pub default_reasoning_effort: Option, - - // Define if we need a special handling of reasoning summary - pub reasoning_summary_format: ReasoningSummaryFormat, - - /// Whether this model supports parallel tool calls when using the - /// Responses API. - pub supports_parallel_tool_calls: bool, - - /// Present if the model performs better when `apply_patch` is provided as - /// a tool call instead of just a bash command - pub apply_patch_tool_type: Option, - - // Instructions to use for querying the model - pub base_instructions: String, - - /// Names of beta tools that should be exposed to this model family. - pub experimental_supported_tools: Vec, - - /// Percentage of the context window considered usable for inputs, after - /// reserving headroom for system prompts, tool overhead, and model output. - /// This is applied when computing the effective context window seen by - /// consumers. - pub effective_context_window_percent: i64, - - /// If the model family supports setting the verbosity level when using Responses API. - pub support_verbosity: bool, - - // The default verbosity level for this model family when using Responses API. - pub default_verbosity: Option, - - /// Preferred shell tool type for this model family when features do not override it. - pub shell_type: ConfigShellToolType, - - pub truncation_policy: TruncationPolicy, -} - -macro_rules! model_family { - ( - $slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)? - ) => {{ - // defaults - #[allow(unused_mut)] - let mut mf = ModelFamily { - slug: $slug.to_string(), - family: $family.to_string(), - needs_special_apply_patch_instructions: false, - supports_reasoning_summaries: false, - reasoning_summary_format: ReasoningSummaryFormat::None, - supports_parallel_tool_calls: false, - apply_patch_tool_type: None, - base_instructions: BASE_INSTRUCTIONS.to_string(), - experimental_supported_tools: Vec::new(), - effective_context_window_percent: 95, - support_verbosity: false, - shell_type: ConfigShellToolType::Default, - default_verbosity: None, - default_reasoning_effort: None, - truncation_policy: TruncationPolicy::Bytes(10_000), - }; - - // apply overrides - $( - mf.$key = $value; - )* - Some(mf) - }}; -} - -/// Returns a `ModelFamily` for the given model slug, or `None` if the slug -/// does not match any known model family. -pub fn find_family_for_model(slug: &str) -> Option { - if slug.starts_with("o3") { - model_family!( - slug, "o3", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - ) - } else if slug.starts_with("o4-mini") { - model_family!( - slug, "o4-mini", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - ) - } else if slug.starts_with("codex-mini-latest") { - model_family!( - slug, "codex-mini-latest", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - shell_type: ConfigShellToolType::Local, - ) - } else if slug.starts_with("gpt-4.1") { - model_family!( - slug, "gpt-4.1", - needs_special_apply_patch_instructions: true, - ) - } else if slug.starts_with("gpt-oss") || slug.starts_with("openai/gpt-oss") { - model_family!(slug, "gpt-oss", apply_patch_tool_type: Some(ApplyPatchToolType::Function)) - } else if slug.starts_with("gpt-4o") { - model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true) - } else if slug.starts_with("gpt-3.5") { - model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true) - } else if slug.starts_with("test-gpt-5") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - reasoning_summary_format: ReasoningSummaryFormat::Experimental, - base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), - experimental_supported_tools: vec![ - "grep_files".to_string(), - "list_dir".to_string(), - "read_file".to_string(), - "test_sync_tool".to_string(), - ], - supports_parallel_tool_calls: true, - shell_type: ConfigShellToolType::ShellCommand, - support_verbosity: true, - truncation_policy: TruncationPolicy::Tokens(10_000), - ) - - // Internal models. - } else if slug.starts_with("codex-exp-") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - reasoning_summary_format: ReasoningSummaryFormat::Experimental, - base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - experimental_supported_tools: vec![ - "grep_files".to_string(), - "list_dir".to_string(), - "read_file".to_string(), - ], - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: true, - truncation_policy: TruncationPolicy::Tokens(10_000), - ) - } else if slug.starts_with("exp-") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: BASE_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::UnifiedExec, - supports_parallel_tool_calls: true, - ) - - // Production models. - } else if slug.starts_with("gpt-5.1-codex-max") { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - reasoning_summary_format: ReasoningSummaryFormat::Experimental, - base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - ) - } else if slug.starts_with("gpt-5-codex") - || slug.starts_with("gpt-5.1-codex") - || slug.starts_with("codex-") - { - model_family!( - slug, slug, - supports_reasoning_summaries: true, - reasoning_summary_format: ReasoningSummaryFormat::Experimental, - base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - support_verbosity: false, - truncation_policy: TruncationPolicy::Tokens(10_000), - ) - } else if slug.starts_with("gpt-5.1") { - model_family!( - slug, "gpt-5.1", - supports_reasoning_summaries: true, - apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), - support_verbosity: true, - default_verbosity: Some(Verbosity::Low), - base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), - default_reasoning_effort: Some(ReasoningEffort::Medium), - truncation_policy: TruncationPolicy::Bytes(10_000), - shell_type: ConfigShellToolType::ShellCommand, - supports_parallel_tool_calls: true, - ) - } else if slug.starts_with("gpt-5") { - model_family!( - slug, "gpt-5", - supports_reasoning_summaries: true, - needs_special_apply_patch_instructions: true, - shell_type: ConfigShellToolType::Default, - support_verbosity: true, - truncation_policy: TruncationPolicy::Bytes(10_000), - ) - } else { - None - } -} - -pub fn derive_default_model_family(model: &str) -> ModelFamily { - ModelFamily { - slug: model.to_string(), - family: model.to_string(), - needs_special_apply_patch_instructions: false, - supports_reasoning_summaries: false, - reasoning_summary_format: ReasoningSummaryFormat::None, - supports_parallel_tool_calls: false, - apply_patch_tool_type: None, - base_instructions: BASE_INSTRUCTIONS.to_string(), - experimental_supported_tools: Vec::new(), - effective_context_window_percent: 95, - support_verbosity: false, - shell_type: ConfigShellToolType::Default, - default_verbosity: None, - default_reasoning_effort: None, - truncation_policy: TruncationPolicy::Bytes(10_000), - } -} diff --git a/codex-rs/core/src/model_provider_info.rs b/codex-rs/core/src/model_provider_info.rs index 4912a64694..aa86501014 100644 --- a/codex-rs/core/src/model_provider_info.rs +++ b/codex-rs/core/src/model_provider_info.rs @@ -99,7 +99,6 @@ pub struct ModelProviderInfo { } impl ModelProviderInfo { - #[allow(dead_code)] fn build_header_map(&self) -> crate::error::Result { let mut headers = HeaderMap::new(); if let Some(extra) = &self.http_headers { @@ -208,6 +207,45 @@ impl ModelProviderInfo { .map(Duration::from_millis) .unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS)) } + pub fn create_openai_provider() -> ModelProviderInfo { + ModelProviderInfo { + name: "OpenAI".into(), + // Allow users to override the default OpenAI endpoint by + // exporting `OPENAI_BASE_URL`. This is useful when pointing + // Codex at a proxy, mock server, or Azure-style deployment + // without requiring a full TOML override for the built-in + // OpenAI provider. + base_url: std::env::var("OPENAI_BASE_URL") + .ok() + .filter(|v| !v.trim().is_empty()), + env_key: None, + env_key_instructions: None, + experimental_bearer_token: None, + wire_api: WireApi::Responses, + query_params: None, + http_headers: Some( + [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())] + .into_iter() + .collect(), + ), + env_http_headers: Some( + [ + ( + "OpenAI-Organization".to_string(), + "OPENAI_ORGANIZATION".to_string(), + ), + ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()), + ] + .into_iter() + .collect(), + ), + // Use global defaults for retry/timeout unless overridden in config.toml. + request_max_retries: None, + stream_max_retries: None, + stream_idle_timeout_ms: None, + requires_openai_auth: true, + } + } } pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234; @@ -225,46 +263,7 @@ pub fn built_in_model_providers() -> HashMap { // open source ("oss") providers by default. Users are encouraged to add to // `model_providers` in config.toml to add their own providers. [ - ( - "openai", - P { - name: "OpenAI".into(), - // Allow users to override the default OpenAI endpoint by - // exporting `OPENAI_BASE_URL`. This is useful when pointing - // Codex at a proxy, mock server, or Azure-style deployment - // without requiring a full TOML override for the built-in - // OpenAI provider. - base_url: std::env::var("OPENAI_BASE_URL") - .ok() - .filter(|v| !v.trim().is_empty()), - env_key: None, - env_key_instructions: None, - experimental_bearer_token: None, - wire_api: WireApi::Responses, - query_params: None, - http_headers: Some( - [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())] - .into_iter() - .collect(), - ), - env_http_headers: Some( - [ - ( - "OpenAI-Organization".to_string(), - "OPENAI_ORGANIZATION".to_string(), - ), - ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()), - ] - .into_iter() - .collect(), - ), - // Use global defaults for retry/timeout unless overridden in config.toml. - request_max_retries: None, - stream_max_retries: None, - stream_idle_timeout_ms: None, - requires_openai_auth: true, - }, - ), + ("openai", P::create_openai_provider()), ( OLLAMA_OSS_PROVIDER_ID, create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat), diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs deleted file mode 100644 index 96f3ed77cb..0000000000 --- a/codex-rs/core/src/openai_model_info.rs +++ /dev/null @@ -1,83 +0,0 @@ -use crate::model_family::ModelFamily; - -// Shared constants for commonly used window/token sizes. -pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000; - -/// Metadata about a model, particularly OpenAI models. -/// We may want to consider including details like the pricing for -/// input tokens, output tokens, etc., though users will need to be able to -/// override this in config.toml, as this information can get out of date. -/// Though this would help present more accurate pricing information in the UI. -#[derive(Debug)] -pub(crate) struct ModelInfo { - /// Size of the context window in tokens. This is the maximum size of the input context. - pub(crate) context_window: i64, - - /// Token threshold where we should automatically compact conversation history. This considers - /// input tokens + output tokens of this turn. - pub(crate) auto_compact_token_limit: Option, -} - -impl ModelInfo { - const fn new(context_window: i64) -> Self { - Self { - context_window, - auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)), - } - } - - const fn default_auto_compact_limit(context_window: i64) -> i64 { - (context_window * 9) / 10 - } -} - -pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option { - let slug = model_family.slug.as_str(); - match slug { - // OSS models have a 128k shared token pool. - // Arbitrarily splitting it: 3/4 input context, 1/4 output. - // https://openai.com/index/gpt-oss-model-card/ - "gpt-oss-20b" => Some(ModelInfo::new(96_000)), - "gpt-oss-120b" => Some(ModelInfo::new(96_000)), - // https://platform.openai.com/docs/models/o3 - "o3" => Some(ModelInfo::new(200_000)), - - // https://platform.openai.com/docs/models/o4-mini - "o4-mini" => Some(ModelInfo::new(200_000)), - - // https://platform.openai.com/docs/models/codex-mini-latest - "codex-mini-latest" => Some(ModelInfo::new(200_000)), - - // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14. - // https://platform.openai.com/docs/models/gpt-4.1 - "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo::new(1_047_576)), - - // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06. - // https://platform.openai.com/docs/models/gpt-4o - "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo::new(128_000)), - - // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13 - "gpt-4o-2024-05-13" => Some(ModelInfo::new(128_000)), - - // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20 - "gpt-4o-2024-11-20" => Some(ModelInfo::new(128_000)), - - // https://platform.openai.com/docs/models/gpt-3.5-turbo - "gpt-3.5-turbo" => Some(ModelInfo::new(16_385)), - - _ if slug.starts_with("gpt-5-codex") - || slug.starts_with("gpt-5.1-codex") - || slug.starts_with("gpt-5.1-codex-max") => - { - Some(ModelInfo::new(CONTEXT_WINDOW_272K)) - } - - _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)), - - _ if slug.starts_with("codex-") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)), - - _ if slug.starts_with("exp-") => Some(ModelInfo::new(CONTEXT_WINDOW_272K)), - - _ => None, - } -} diff --git a/codex-rs/core/src/openai_models/cache.rs b/codex-rs/core/src/openai_models/cache.rs new file mode 100644 index 0000000000..cac16cc853 --- /dev/null +++ b/codex-rs/core/src/openai_models/cache.rs @@ -0,0 +1,56 @@ +use chrono::DateTime; +use chrono::Utc; +use codex_protocol::openai_models::ModelInfo; +use serde::Deserialize; +use serde::Serialize; +use std::io; +use std::io::ErrorKind; +use std::path::Path; +use std::time::Duration; +use tokio::fs; + +/// Serialized snapshot of models and metadata cached on disk. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct ModelsCache { + pub(crate) fetched_at: DateTime, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub(crate) etag: Option, + pub(crate) models: Vec, +} + +impl ModelsCache { + /// Returns `true` when the cache entry has not exceeded the configured TTL. + pub(crate) fn is_fresh(&self, ttl: Duration) -> bool { + if ttl.is_zero() { + return false; + } + let Ok(ttl_duration) = chrono::Duration::from_std(ttl) else { + return false; + }; + let age = Utc::now().signed_duration_since(self.fetched_at); + age <= ttl_duration + } +} + +/// Read and deserialize the cache file if it exists. +pub(crate) async fn load_cache(path: &Path) -> io::Result> { + match fs::read(path).await { + Ok(contents) => { + let cache = serde_json::from_slice(&contents) + .map_err(|err| io::Error::new(ErrorKind::InvalidData, err.to_string()))?; + Ok(Some(cache)) + } + Err(err) if err.kind() == ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} + +/// Persist the cache contents to disk, creating parent directories as needed. +pub(crate) async fn save_cache(path: &Path, cache: &ModelsCache) -> io::Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).await?; + } + let json = serde_json::to_vec_pretty(cache) + .map_err(|err| io::Error::new(ErrorKind::InvalidData, err.to_string()))?; + fs::write(path, json).await +} diff --git a/codex-rs/core/src/openai_models/mod.rs b/codex-rs/core/src/openai_models/mod.rs new file mode 100644 index 0000000000..a77438ebc9 --- /dev/null +++ b/codex-rs/core/src/openai_models/mod.rs @@ -0,0 +1,4 @@ +mod cache; +pub mod model_family; +pub mod model_presets; +pub mod models_manager; diff --git a/codex-rs/core/src/openai_models/model_family.rs b/codex-rs/core/src/openai_models/model_family.rs new file mode 100644 index 0000000000..4e3375c580 --- /dev/null +++ b/codex-rs/core/src/openai_models/model_family.rs @@ -0,0 +1,543 @@ +use codex_protocol::config_types::Verbosity; +use codex_protocol::openai_models::ApplyPatchToolType; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningSummaryFormat; + +use crate::config::Config; +use crate::truncate::TruncationPolicy; + +/// The `instructions` field in the payload sent to a model should always start +/// with this content. +const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md"); + +const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt_5_codex_prompt.md"); +const GPT_5_1_INSTRUCTIONS: &str = include_str!("../../gpt_5_1_prompt.md"); +const GPT_5_2_INSTRUCTIONS: &str = include_str!("../../gpt_5_2_prompt.md"); +const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../../gpt-5.1-codex-max_prompt.md"); +pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000; + +/// A model family is a group of models that share certain characteristics. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ModelFamily { + /// The full model slug used to derive this model family, e.g. + /// "gpt-4.1-2025-04-14". + pub slug: String, + + /// The model family name, e.g. "gpt-4.1". This string is used when deriving + /// default metadata for the family, such as context windows. + pub family: String, + + /// True if the model needs additional instructions on how to use the + /// "virtual" `apply_patch` CLI. + pub needs_special_apply_patch_instructions: bool, + + /// Maximum supported context window, if known. + pub context_window: Option, + + /// Token threshold for automatic compaction if config does not override it. + auto_compact_token_limit: Option, + + // Whether the `reasoning` field can be set when making a request to this + // model family. Note it has `effort` and `summary` subfields (though + // `summary` is optional). + pub supports_reasoning_summaries: bool, + + // The reasoning effort to use for this model family when none is explicitly chosen. + pub default_reasoning_effort: Option, + + // Define if we need a special handling of reasoning summary + pub reasoning_summary_format: ReasoningSummaryFormat, + + /// Whether this model supports parallel tool calls when using the + /// Responses API. + pub supports_parallel_tool_calls: bool, + + /// Present if the model performs better when `apply_patch` is provided as + /// a tool call instead of just a bash command + pub apply_patch_tool_type: Option, + + // Instructions to use for querying the model + pub base_instructions: String, + + /// Names of beta tools that should be exposed to this model family. + pub experimental_supported_tools: Vec, + + /// Percentage of the context window considered usable for inputs, after + /// reserving headroom for system prompts, tool overhead, and model output. + /// This is applied when computing the effective context window seen by + /// consumers. + pub effective_context_window_percent: i64, + + /// If the model family supports setting the verbosity level when using Responses API. + pub support_verbosity: bool, + + // The default verbosity level for this model family when using Responses API. + pub default_verbosity: Option, + + /// Preferred shell tool type for this model family when features do not override it. + pub shell_type: ConfigShellToolType, + + pub truncation_policy: TruncationPolicy, +} + +impl ModelFamily { + pub fn with_config_overrides(mut self, config: &Config) -> Self { + if let Some(supports_reasoning_summaries) = config.model_supports_reasoning_summaries { + self.supports_reasoning_summaries = supports_reasoning_summaries; + } + if let Some(reasoning_summary_format) = config.model_reasoning_summary_format.as_ref() { + self.reasoning_summary_format = reasoning_summary_format.clone(); + } + if let Some(context_window) = config.model_context_window { + self.context_window = Some(context_window); + } + if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit { + self.auto_compact_token_limit = Some(auto_compact_token_limit); + } + self + } + pub fn with_remote_overrides(mut self, remote_models: Vec) -> Self { + for model in remote_models { + if model.slug == self.slug { + self.apply_remote_overrides(model); + } + } + self + } + + fn apply_remote_overrides(&mut self, model: ModelInfo) { + let ModelInfo { + slug: _, + display_name: _, + description: _, + default_reasoning_level, + supported_reasoning_levels: _, + shell_type, + visibility: _, + minimal_client_version: _, + supported_in_api: _, + priority: _, + upgrade: _, + base_instructions, + supports_reasoning_summaries, + support_verbosity, + default_verbosity, + apply_patch_tool_type, + truncation_policy, + supports_parallel_tool_calls, + context_window, + reasoning_summary_format, + experimental_supported_tools, + } = model; + + self.default_reasoning_effort = Some(default_reasoning_level); + self.shell_type = shell_type; + if let Some(base) = base_instructions { + self.base_instructions = base; + } + self.supports_reasoning_summaries = supports_reasoning_summaries; + self.support_verbosity = support_verbosity; + self.default_verbosity = default_verbosity; + self.apply_patch_tool_type = apply_patch_tool_type; + self.truncation_policy = truncation_policy.into(); + self.supports_parallel_tool_calls = supports_parallel_tool_calls; + self.context_window = context_window; + self.reasoning_summary_format = reasoning_summary_format; + self.experimental_supported_tools = experimental_supported_tools; + } + + pub fn auto_compact_token_limit(&self) -> Option { + self.auto_compact_token_limit + .or(self.context_window.map(Self::default_auto_compact_limit)) + } + + const fn default_auto_compact_limit(context_window: i64) -> i64 { + (context_window * 9) / 10 + } + + pub fn get_model_slug(&self) -> &str { + &self.slug + } +} + +macro_rules! model_family { + ( + $slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)? + ) => {{ + // defaults + #[allow(unused_mut)] + let mut mf = ModelFamily { + slug: $slug.to_string(), + family: $family.to_string(), + needs_special_apply_patch_instructions: false, + context_window: Some(CONTEXT_WINDOW_272K), + auto_compact_token_limit: None, + supports_reasoning_summaries: false, + reasoning_summary_format: ReasoningSummaryFormat::None, + supports_parallel_tool_calls: false, + apply_patch_tool_type: None, + base_instructions: BASE_INSTRUCTIONS.to_string(), + experimental_supported_tools: Vec::new(), + effective_context_window_percent: 95, + support_verbosity: false, + shell_type: ConfigShellToolType::Default, + default_verbosity: None, + default_reasoning_effort: None, + truncation_policy: TruncationPolicy::Bytes(10_000), + }; + + // apply overrides + $( + mf.$key = $value; + )* + mf + }}; +} + +/// Internal offline helper for `ModelsManager` that returns a `ModelFamily` for the given +/// model slug. +pub(in crate::openai_models) fn find_family_for_model(slug: &str) -> ModelFamily { + if slug.starts_with("o3") { + model_family!( + slug, "o3", + supports_reasoning_summaries: true, + needs_special_apply_patch_instructions: true, + context_window: Some(200_000), + ) + } else if slug.starts_with("o4-mini") { + model_family!( + slug, "o4-mini", + supports_reasoning_summaries: true, + needs_special_apply_patch_instructions: true, + context_window: Some(200_000), + ) + } else if slug.starts_with("codex-mini-latest") { + model_family!( + slug, "codex-mini-latest", + supports_reasoning_summaries: true, + needs_special_apply_patch_instructions: true, + shell_type: ConfigShellToolType::Local, + context_window: Some(200_000), + ) + } else if slug.starts_with("gpt-4.1") { + model_family!( + slug, "gpt-4.1", + needs_special_apply_patch_instructions: true, + context_window: Some(1_047_576), + ) + } else if slug.starts_with("gpt-oss") || slug.starts_with("openai/gpt-oss") { + model_family!( + slug, "gpt-oss", + apply_patch_tool_type: Some(ApplyPatchToolType::Function), + context_window: Some(96_000), + ) + } else if slug.starts_with("gpt-4o") { + model_family!( + slug, "gpt-4o", + needs_special_apply_patch_instructions: true, + context_window: Some(128_000), + ) + } else if slug.starts_with("gpt-3.5") { + model_family!( + slug, "gpt-3.5", + needs_special_apply_patch_instructions: true, + context_window: Some(16_385), + ) + } else if slug.starts_with("test-gpt-5") { + model_family!( + slug, slug, + supports_reasoning_summaries: true, + reasoning_summary_format: ReasoningSummaryFormat::Experimental, + base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), + experimental_supported_tools: vec![ + "grep_files".to_string(), + "list_dir".to_string(), + "read_file".to_string(), + "test_sync_tool".to_string(), + ], + supports_parallel_tool_calls: true, + shell_type: ConfigShellToolType::ShellCommand, + support_verbosity: true, + truncation_policy: TruncationPolicy::Tokens(10_000), + ) + + // Experimental models. + } else if slug.starts_with("exp-codex") { + // Same as gpt-5.1-codex-max. + model_family!( + slug, slug, + supports_reasoning_summaries: true, + reasoning_summary_format: ReasoningSummaryFormat::Experimental, + base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + support_verbosity: false, + truncation_policy: TruncationPolicy::Tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("exp-") { + model_family!( + slug, slug, + supports_reasoning_summaries: true, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: BASE_INSTRUCTIONS.to_string(), + default_reasoning_effort: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicy::Bytes(10_000), + shell_type: ConfigShellToolType::UnifiedExec, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + ) + + // Production models. + } else if slug.starts_with("gpt-5.1-codex-max") { + model_family!( + slug, slug, + supports_reasoning_summaries: true, + reasoning_summary_format: ReasoningSummaryFormat::Experimental, + base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: false, + support_verbosity: false, + truncation_policy: TruncationPolicy::Tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("gpt-5-codex") + || slug.starts_with("gpt-5.1-codex") + || slug.starts_with("codex-") + { + model_family!( + slug, slug, + supports_reasoning_summaries: true, + reasoning_summary_format: ReasoningSummaryFormat::Experimental, + base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: false, + support_verbosity: false, + truncation_policy: TruncationPolicy::Tokens(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("gpt-5.2") { + model_family!( + slug, slug, + supports_reasoning_summaries: true, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: GPT_5_2_INSTRUCTIONS.to_string(), + default_reasoning_effort: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicy::Bytes(10_000), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("gpt-5.1") { + model_family!( + slug, "gpt-5.1", + supports_reasoning_summaries: true, + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + support_verbosity: true, + default_verbosity: Some(Verbosity::Low), + base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), + default_reasoning_effort: Some(ReasoningEffort::Medium), + truncation_policy: TruncationPolicy::Bytes(10_000), + shell_type: ConfigShellToolType::ShellCommand, + supports_parallel_tool_calls: true, + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else if slug.starts_with("gpt-5") { + model_family!( + slug, "gpt-5", + supports_reasoning_summaries: true, + needs_special_apply_patch_instructions: true, + shell_type: ConfigShellToolType::Default, + support_verbosity: true, + truncation_policy: TruncationPolicy::Bytes(10_000), + context_window: Some(CONTEXT_WINDOW_272K), + ) + } else { + derive_default_model_family(slug) + } +} + +fn derive_default_model_family(model: &str) -> ModelFamily { + tracing::warn!("Unknown model {model} is used. This will degrade the performance of Codex."); + ModelFamily { + slug: model.to_string(), + family: model.to_string(), + needs_special_apply_patch_instructions: false, + context_window: None, + auto_compact_token_limit: None, + supports_reasoning_summaries: false, + reasoning_summary_format: ReasoningSummaryFormat::None, + supports_parallel_tool_calls: false, + apply_patch_tool_type: None, + base_instructions: BASE_INSTRUCTIONS.to_string(), + experimental_supported_tools: Vec::new(), + effective_context_window_percent: 95, + support_verbosity: false, + shell_type: ConfigShellToolType::Default, + default_verbosity: None, + default_reasoning_effort: None, + truncation_policy: TruncationPolicy::Bytes(10_000), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use codex_protocol::openai_models::ClientVersion; + use codex_protocol::openai_models::ModelVisibility; + use codex_protocol::openai_models::ReasoningEffortPreset; + use codex_protocol::openai_models::TruncationPolicyConfig; + + fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo { + ModelInfo { + slug: slug.to_string(), + display_name: slug.to_string(), + description: Some(format!("{slug} desc")), + default_reasoning_level: effort, + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort, + description: effort.to_string(), + }], + shell_type: shell, + visibility: ModelVisibility::List, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority: 1, + upgrade: None, + base_instructions: None, + supports_reasoning_summaries: false, + support_verbosity: false, + default_verbosity: None, + apply_patch_tool_type: None, + truncation_policy: TruncationPolicyConfig::bytes(10_000), + supports_parallel_tool_calls: false, + context_window: None, + reasoning_summary_format: ReasoningSummaryFormat::None, + experimental_supported_tools: Vec::new(), + } + } + + #[test] + fn remote_overrides_apply_when_slug_matches() { + let family = model_family!("gpt-4o-mini", "gpt-4o-mini"); + assert_ne!(family.default_reasoning_effort, Some(ReasoningEffort::High)); + + let updated = family.with_remote_overrides(vec![ + remote( + "gpt-4o-mini", + ReasoningEffort::High, + ConfigShellToolType::ShellCommand, + ), + remote( + "other-model", + ReasoningEffort::Low, + ConfigShellToolType::UnifiedExec, + ), + ]); + + assert_eq!( + updated.default_reasoning_effort, + Some(ReasoningEffort::High) + ); + assert_eq!(updated.shell_type, ConfigShellToolType::ShellCommand); + } + + #[test] + fn remote_overrides_skip_non_matching_models() { + let family = model_family!( + "codex-mini-latest", + "codex-mini-latest", + shell_type: ConfigShellToolType::Local + ); + + let updated = family.clone().with_remote_overrides(vec![remote( + "other", + ReasoningEffort::High, + ConfigShellToolType::ShellCommand, + )]); + + assert_eq!( + updated.default_reasoning_effort, + family.default_reasoning_effort + ); + assert_eq!(updated.shell_type, family.shell_type); + } + + #[test] + fn remote_overrides_apply_extended_metadata() { + let family = model_family!( + "gpt-5.1", + "gpt-5.1", + supports_reasoning_summaries: false, + support_verbosity: false, + default_verbosity: None, + apply_patch_tool_type: Some(ApplyPatchToolType::Function), + supports_parallel_tool_calls: false, + experimental_supported_tools: vec!["local".to_string()], + truncation_policy: TruncationPolicy::Bytes(10_000), + context_window: Some(100), + reasoning_summary_format: ReasoningSummaryFormat::None, + ); + + let updated = family.with_remote_overrides(vec![ModelInfo { + slug: "gpt-5.1".to_string(), + display_name: "gpt-5.1".to_string(), + description: Some("desc".to_string()), + default_reasoning_level: ReasoningEffort::High, + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "High".to_string(), + }], + shell_type: ConfigShellToolType::ShellCommand, + visibility: ModelVisibility::List, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority: 10, + upgrade: None, + base_instructions: Some("Remote instructions".to_string()), + supports_reasoning_summaries: true, + support_verbosity: true, + default_verbosity: Some(Verbosity::High), + apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), + truncation_policy: TruncationPolicyConfig::tokens(2_000), + supports_parallel_tool_calls: true, + context_window: Some(400_000), + reasoning_summary_format: ReasoningSummaryFormat::Experimental, + experimental_supported_tools: vec!["alpha".to_string(), "beta".to_string()], + }]); + + assert_eq!( + updated.default_reasoning_effort, + Some(ReasoningEffort::High) + ); + assert!(updated.supports_reasoning_summaries); + assert!(updated.support_verbosity); + assert_eq!(updated.default_verbosity, Some(Verbosity::High)); + assert_eq!(updated.shell_type, ConfigShellToolType::ShellCommand); + assert_eq!( + updated.apply_patch_tool_type, + Some(ApplyPatchToolType::Freeform) + ); + assert_eq!(updated.truncation_policy, TruncationPolicy::Tokens(2_000)); + assert!(updated.supports_parallel_tool_calls); + assert_eq!(updated.context_window, Some(400_000)); + assert_eq!( + updated.reasoning_summary_format, + ReasoningSummaryFormat::Experimental + ); + assert_eq!( + updated.experimental_supported_tools, + vec!["alpha".to_string(), "beta".to_string()] + ); + assert_eq!(updated.base_instructions, "Remote instructions"); + } +} diff --git a/codex-rs/common/src/model_presets.rs b/codex-rs/core/src/openai_models/model_presets.rs similarity index 60% rename from codex-rs/common/src/model_presets.rs rename to codex-rs/core/src/openai_models/model_presets.rs index a031f23b1d..42a5557f40 100644 --- a/codex-rs/common/src/model_presets.rs +++ b/codex-rs/core/src/openai_models/model_presets.rs @@ -1,76 +1,38 @@ -use std::collections::HashMap; - use codex_app_server_protocol::AuthMode; -use codex_core::protocol_config_types::ReasoningEffort; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelUpgrade; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; use once_cell::sync::Lazy; pub const HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG: &str = "hide_gpt5_1_migration_prompt"; pub const HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG: &str = "hide_gpt-5.1-codex-max_migration_prompt"; -/// A reasoning effort option that can be surfaced for a model. -#[derive(Debug, Clone, Copy)] -pub struct ReasoningEffortPreset { - /// Effort level that the model supports. - pub effort: ReasoningEffort, - /// Short human description shown next to the effort in UIs. - pub description: &'static str, -} - -#[derive(Debug, Clone)] -pub struct ModelUpgrade { - pub id: &'static str, - pub reasoning_effort_mapping: Option>, - pub migration_config_key: &'static str, -} - -/// Metadata describing a Codex-supported model. -#[derive(Debug, Clone)] -pub struct ModelPreset { - /// Stable identifier for the preset. - pub id: &'static str, - /// Model slug (e.g., "gpt-5"). - pub model: &'static str, - /// Display name shown in UIs. - pub display_name: &'static str, - /// Short human description shown in UIs. - pub description: &'static str, - /// Reasoning effort applied when none is explicitly chosen. - pub default_reasoning_effort: ReasoningEffort, - /// Supported reasoning effort options. - pub supported_reasoning_efforts: &'static [ReasoningEffortPreset], - /// Whether this is the default model for new users. - pub is_default: bool, - /// recommended upgrade model - pub upgrade: Option, - /// Whether this preset should appear in the picker UI. - pub show_in_picker: bool, -} - static PRESETS: Lazy> = Lazy::new(|| { vec![ ModelPreset { - id: "gpt-5.1-codex-max", - model: "gpt-5.1-codex-max", - display_name: "gpt-5.1-codex-max", - description: "Latest Codex-optimized flagship for deep and fast reasoning.", + id: "gpt-5.1-codex-max".to_string(), + model: "gpt-5.1-codex-max".to_string(), + display_name: "gpt-5.1-codex-max".to_string(), + description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Low, - description: "Fast responses with lighter reasoning", + description: "Fast responses with lighter reasoning".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Balances speed and reasoning depth for everyday tasks", + description: "Balances speed and reasoning depth for everyday tasks".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex problems", + description: "Maximizes reasoning depth for complex problems".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::XHigh, - description: "Extra high reasoning depth for complex problems", + description: "Extra high reasoning depth for complex problems".to_string(), }, ], is_default: true, @@ -78,184 +40,212 @@ static PRESETS: Lazy> = Lazy::new(|| { show_in_picker: true, }, ModelPreset { - id: "gpt-5.1-codex", - model: "gpt-5.1-codex", - display_name: "gpt-5.1-codex", - description: "Optimized for codex.", + id: "gpt-5.1-codex".to_string(), + model: "gpt-5.1-codex".to_string(), + display_name: "gpt-5.1-codex".to_string(), + description: "Optimized for codex.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Low, - description: "Fastest responses with limited reasoning", + description: "Fastest responses with limited reasoning".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Dynamically adjusts reasoning based on the task", + description: "Dynamically adjusts reasoning based on the task".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems" + .to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-max", + id: "gpt-5.1-codex-max".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: true, }, ModelPreset { - id: "gpt-5.1-codex-mini", - model: "gpt-5.1-codex-mini", - display_name: "gpt-5.1-codex-mini", - description: "Optimized for codex. Cheaper, faster, but less capable.", + id: "gpt-5.1-codex-mini".to_string(), + model: "gpt-5.1-codex-mini".to_string(), + display_name: "gpt-5.1-codex-mini".to_string(), + description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Dynamically adjusts reasoning based on the task", + description: "Dynamically adjusts reasoning based on the task".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems" + .to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-max", + id: "gpt-5.1-codex-max".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: true, }, ModelPreset { - id: "gpt-5.1", - model: "gpt-5.1", - display_name: "gpt-5.1", - description: "Broad world knowledge with strong general reasoning.", + id: "gpt-5.2".to_string(), + model: "gpt-5.2".to_string(), + display_name: "gpt-5.2".to_string(), + description: "Latest frontier model with improvements across knowledge, reasoning and coding".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Low, - description: "Balances speed with some reasoning; useful for straightforward queries and short explanations", + description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks", + description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::XHigh, + description: "Extra high reasoning for complex problems".to_string(), + }, + ], + is_default: false, + upgrade: None, + show_in_picker: true, + }, + ModelPreset { + id: "gpt-5.1".to_string(), + model: "gpt-5.1".to_string(), + display_name: "gpt-5.1".to_string(), + description: "Broad world knowledge with strong general reasoning.".to_string(), + default_reasoning_effort: ReasoningEffort::Medium, + supported_reasoning_efforts: vec![ + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-max", + id: "gpt-5.1-codex-max".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: true, }, // Deprecated models. ModelPreset { - id: "gpt-5-codex", - model: "gpt-5-codex", - display_name: "gpt-5-codex", - description: "Optimized for codex.", + id: "gpt-5-codex".to_string(), + model: "gpt-5-codex".to_string(), + display_name: "gpt-5-codex".to_string(), + description: "Optimized for codex.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Low, - description: "Fastest responses with limited reasoning", + description: "Fastest responses with limited reasoning".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Dynamically adjusts reasoning based on the task", + description: "Dynamically adjusts reasoning based on the task".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-max", + id: "gpt-5.1-codex-max".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: false, }, ModelPreset { - id: "gpt-5-codex-mini", - model: "gpt-5-codex-mini", - display_name: "gpt-5-codex-mini", - description: "Optimized for codex. Cheaper, faster, but less capable.", + id: "gpt-5-codex-mini".to_string(), + model: "gpt-5-codex-mini".to_string(), + display_name: "gpt-5-codex-mini".to_string(), + description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Dynamically adjusts reasoning based on the task", + description: "Dynamically adjusts reasoning based on the task".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-mini", + id: "gpt-5.1-codex-mini".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: false, }, ModelPreset { - id: "gpt-5", - model: "gpt-5", - display_name: "gpt-5", - description: "Broad world knowledge with strong general reasoning.", + id: "gpt-5".to_string(), + model: "gpt-5".to_string(), + display_name: "gpt-5".to_string(), + description: "Broad world knowledge with strong general reasoning.".to_string(), default_reasoning_effort: ReasoningEffort::Medium, - supported_reasoning_efforts: &[ + supported_reasoning_efforts: vec![ ReasoningEffortPreset { effort: ReasoningEffort::Minimal, - description: "Fastest responses with little reasoning", + description: "Fastest responses with little reasoning".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Low, - description: "Balances speed with some reasoning; useful for straightforward queries and short explanations", + description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::Medium, - description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks", + description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(), }, ReasoningEffortPreset { effort: ReasoningEffort::High, - description: "Maximizes reasoning depth for complex or ambiguous problems", + description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(), }, ], is_default: false, upgrade: Some(ModelUpgrade { - id: "gpt-5.1-codex-max", + id: "gpt-5.1-codex-max".to_string(), reasoning_effort_mapping: None, - migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG, + migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG.to_string(), }), show_in_picker: false, }, ] }); -pub fn builtin_model_presets(auth_mode: Option) -> Vec { +pub(crate) fn builtin_model_presets(_auth_mode: Option) -> Vec { PRESETS .iter() - .filter(|preset| match auth_mode { - Some(AuthMode::ApiKey) => preset.show_in_picker && preset.id != "gpt-5.1-codex-max", - _ => preset.show_in_picker, - }) + .filter(|preset| preset.show_in_picker) .cloned() .collect() } +// todo(aibrahim): remove this once we migrate tests pub fn all_model_presets() -> &'static Vec { &PRESETS } @@ -263,21 +253,10 @@ pub fn all_model_presets() -> &'static Vec { #[cfg(test)] mod tests { use super::*; - use codex_app_server_protocol::AuthMode; #[test] fn only_one_default_model_is_configured() { let default_models = PRESETS.iter().filter(|preset| preset.is_default).count(); assert!(default_models == 1); } - - #[test] - fn gpt_5_1_codex_max_hidden_for_api_key_auth() { - let presets = builtin_model_presets(Some(AuthMode::ApiKey)); - assert!( - presets - .iter() - .all(|preset| preset.id != "gpt-5.1-codex-max") - ); - } } diff --git a/codex-rs/core/src/openai_models/models_manager.rs b/codex-rs/core/src/openai_models/models_manager.rs new file mode 100644 index 0000000000..b383079f44 --- /dev/null +++ b/codex-rs/core/src/openai_models/models_manager.rs @@ -0,0 +1,496 @@ +use chrono::Utc; +use codex_api::ModelsClient; +use codex_api::ReqwestTransport; +use codex_app_server_protocol::AuthMode; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelsResponse; +use http::HeaderMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::RwLock; +use tokio::sync::TryLockError; +use tracing::error; + +use super::cache; +use super::cache::ModelsCache; +use crate::api_bridge::auth_provider_from_auth; +use crate::api_bridge::map_api_error; +use crate::auth::AuthManager; +use crate::config::Config; +use crate::default_client::build_reqwest_client; +use crate::error::Result as CoreResult; +use crate::features::Feature; +use crate::model_provider_info::ModelProviderInfo; +use crate::openai_models::model_family::ModelFamily; +use crate::openai_models::model_presets::builtin_model_presets; + +const MODEL_CACHE_FILE: &str = "models_cache.json"; +const DEFAULT_MODEL_CACHE_TTL: Duration = Duration::from_secs(300); +const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex-max"; +const CODEX_AUTO_BALANCED_MODEL: &str = "codex-auto-balanced"; + +/// Coordinates remote model discovery plus cached metadata on disk. +#[derive(Debug)] +pub struct ModelsManager { + // todo(aibrahim) merge available_models and model family creation into one struct + available_models: RwLock>, + remote_models: RwLock>, + auth_manager: Arc, + etag: RwLock>, + codex_home: PathBuf, + cache_ttl: Duration, + provider: ModelProviderInfo, +} + +impl ModelsManager { + /// Construct a manager scoped to the provided `AuthManager`. + pub fn new(auth_manager: Arc) -> Self { + let codex_home = auth_manager.codex_home().to_path_buf(); + Self { + available_models: RwLock::new(builtin_model_presets(auth_manager.get_auth_mode())), + remote_models: RwLock::new(Vec::new()), + auth_manager, + etag: RwLock::new(None), + codex_home, + cache_ttl: DEFAULT_MODEL_CACHE_TTL, + provider: ModelProviderInfo::create_openai_provider(), + } + } + + #[cfg(any(test, feature = "test-support"))] + /// Construct a manager scoped to the provided `AuthManager` with a specific provider. Used for integration tests. + pub fn with_provider(auth_manager: Arc, provider: ModelProviderInfo) -> Self { + let codex_home = auth_manager.codex_home().to_path_buf(); + Self { + available_models: RwLock::new(builtin_model_presets(auth_manager.get_auth_mode())), + remote_models: RwLock::new(Vec::new()), + auth_manager, + etag: RwLock::new(None), + codex_home, + cache_ttl: DEFAULT_MODEL_CACHE_TTL, + provider, + } + } + + /// Fetch the latest remote models, using the on-disk cache when still fresh. + pub async fn refresh_available_models(&self, config: &Config) -> CoreResult<()> { + if !config.features.enabled(Feature::RemoteModels) { + return Ok(()); + } + if self.try_load_cache().await { + return Ok(()); + } + + let auth = self.auth_manager.auth(); + let api_provider = self.provider.to_api_provider(Some(AuthMode::ChatGPT))?; + let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?; + let transport = ReqwestTransport::new(build_reqwest_client()); + let client = ModelsClient::new(transport, api_provider, api_auth); + + let client_version = format_client_version_to_whole(); + let ModelsResponse { models, etag } = client + .list_models(&client_version, HeaderMap::new()) + .await + .map_err(map_api_error)?; + + let etag = (!etag.is_empty()).then_some(etag); + + self.apply_remote_models(models.clone()).await; + *self.etag.write().await = etag.clone(); + self.persist_cache(&models, etag).await; + Ok(()) + } + + pub async fn list_models(&self, config: &Config) -> Vec { + if let Err(err) = self.refresh_available_models(config).await { + error!("failed to refresh available models: {err}"); + } + self.available_models.read().await.clone() + } + + pub fn try_list_models(&self) -> Result, TryLockError> { + self.available_models + .try_read() + .map(|models| models.clone()) + } + + fn find_family_for_model(slug: &str) -> ModelFamily { + super::model_family::find_family_for_model(slug) + } + + /// Look up the requested model family while applying remote metadata overrides. + pub async fn construct_model_family(&self, model: &str, config: &Config) -> ModelFamily { + Self::find_family_for_model(model) + .with_config_overrides(config) + .with_remote_overrides(self.remote_models.read().await.clone()) + } + + pub async fn get_model(&self, model: &Option, config: &Config) -> String { + if let Some(model) = model.as_ref() { + return model.to_string(); + } + if let Err(err) = self.refresh_available_models(config).await { + error!("failed to refresh available models: {err}"); + } + // if codex-auto-balanced exists & signed in with chatgpt mode, return it, otherwise return the default model + let auth_mode = self.auth_manager.get_auth_mode(); + if auth_mode == Some(AuthMode::ChatGPT) + && self + .available_models + .read() + .await + .iter() + .any(|m| m.model == CODEX_AUTO_BALANCED_MODEL) + { + return CODEX_AUTO_BALANCED_MODEL.to_string(); + } + OPENAI_DEFAULT_MODEL.to_string() + } + + #[cfg(any(test, feature = "test-support"))] + pub fn get_model_offline(model: Option<&str>) -> String { + model.unwrap_or(OPENAI_DEFAULT_MODEL).to_string() + } + + #[cfg(any(test, feature = "test-support"))] + /// Offline helper that builds a `ModelFamily` without consulting remote state. + pub fn construct_model_family_offline(model: &str, config: &Config) -> ModelFamily { + Self::find_family_for_model(model).with_config_overrides(config) + } + + /// Replace the cached remote models and rebuild the derived presets list. + async fn apply_remote_models(&self, models: Vec) { + *self.remote_models.write().await = models; + self.build_available_models().await; + } + + /// Attempt to satisfy the refresh from the cache when it matches the provider and TTL. + async fn try_load_cache(&self) -> bool { + // todo(aibrahim): think if we should store fetched_at in ModelsManager so we don't always need to read the disk + let cache_path = self.cache_path(); + let cache = match cache::load_cache(&cache_path).await { + Ok(cache) => cache, + Err(err) => { + error!("failed to load models cache: {err}"); + return false; + } + }; + let cache = match cache { + Some(cache) => cache, + None => return false, + }; + if !cache.is_fresh(self.cache_ttl) { + return false; + } + let models = cache.models.clone(); + *self.etag.write().await = cache.etag.clone(); + self.apply_remote_models(models.clone()).await; + true + } + + /// Serialize the latest fetch to disk for reuse across future processes. + async fn persist_cache(&self, models: &[ModelInfo], etag: Option) { + let cache = ModelsCache { + fetched_at: Utc::now(), + etag, + models: models.to_vec(), + }; + let cache_path = self.cache_path(); + if let Err(err) = cache::save_cache(&cache_path, &cache).await { + error!("failed to write models cache: {err}"); + } + } + + /// Convert remote model metadata into picker-ready presets, marking defaults. + async fn build_available_models(&self) { + let mut available_models = self.remote_models.read().await.clone(); + available_models.sort_by(|a, b| a.priority.cmp(&b.priority)); + let mut model_presets: Vec = available_models + .into_iter() + .map(Into::into) + .filter(|preset: &ModelPreset| preset.show_in_picker) + .collect(); + if let Some(default) = model_presets.first_mut() { + default.is_default = true; + } + { + let mut available_models_guard = self.available_models.write().await; + *available_models_guard = model_presets; + } + } + + fn cache_path(&self) -> PathBuf { + self.codex_home.join(MODEL_CACHE_FILE) + } +} + +/// Convert a client version string to a whole version string (e.g. "1.2.3-alpha.4" -> "1.2.3") +fn format_client_version_to_whole() -> String { + format_client_version_from_parts( + env!("CARGO_PKG_VERSION_MAJOR"), + env!("CARGO_PKG_VERSION_MINOR"), + env!("CARGO_PKG_VERSION_PATCH"), + ) +} + +fn format_client_version_from_parts(major: &str, minor: &str, patch: &str) -> String { + const DEV_VERSION: &str = "0.0.0"; + const FALLBACK_VERSION: &str = "99.99.99"; + + let normalized = format!("{major}.{minor}.{patch}"); + + if normalized == DEV_VERSION { + FALLBACK_VERSION.to_string() + } else { + normalized + } +} + +#[cfg(test)] +mod tests { + use super::cache::ModelsCache; + use super::*; + use crate::CodexAuth; + use crate::auth::AuthCredentialsStoreMode; + use crate::config::Config; + use crate::config::ConfigOverrides; + use crate::config::ConfigToml; + use crate::features::Feature; + use crate::model_provider_info::WireApi; + use codex_protocol::openai_models::ModelsResponse; + use core_test_support::responses::mount_models_once; + use serde_json::json; + use tempfile::tempdir; + use wiremock::MockServer; + + fn remote_model(slug: &str, display: &str, priority: i32) -> ModelInfo { + serde_json::from_value(json!({ + "slug": slug, + "display_name": display, + "description": format!("{display} desc"), + "default_reasoning_level": "medium", + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}], + "shell_type": "shell_command", + "visibility": "list", + "minimal_client_version": [0, 1, 0], + "supported_in_api": true, + "priority": priority, + "upgrade": null, + "base_instructions": null, + "supports_reasoning_summaries": false, + "support_verbosity": false, + "default_verbosity": null, + "apply_patch_tool_type": null, + "truncation_policy": {"mode": "bytes", "limit": 10_000}, + "supports_parallel_tool_calls": false, + "context_window": null, + "reasoning_summary_format": "none", + "experimental_supported_tools": [], + })) + .expect("valid model") + } + + fn provider_for(base_url: String) -> ModelProviderInfo { + ModelProviderInfo { + name: "mock".into(), + base_url: Some(base_url), + env_key: None, + env_key_instructions: None, + experimental_bearer_token: None, + wire_api: WireApi::Responses, + query_params: None, + http_headers: None, + env_http_headers: None, + request_max_retries: Some(0), + stream_max_retries: Some(0), + stream_idle_timeout_ms: Some(5_000), + requires_openai_auth: false, + } + } + + #[tokio::test] + async fn refresh_available_models_sorts_and_marks_default() { + let server = MockServer::start().await; + let remote_models = vec![ + remote_model("priority-low", "Low", 1), + remote_model("priority-high", "High", 0), + ]; + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: remote_models.clone(), + etag: String::new(), + }, + ) + .await; + + let codex_home = tempdir().expect("temp dir"); + let mut config = Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + ) + .expect("load default test config"); + config.features.enable(Feature::RemoteModels); + let auth_manager = + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); + let provider = provider_for(server.uri()); + let manager = ModelsManager::with_provider(auth_manager, provider); + + manager + .refresh_available_models(&config) + .await + .expect("refresh succeeds"); + let cached_remote = manager.remote_models.read().await.clone(); + assert_eq!(cached_remote, remote_models); + + let available = manager.list_models(&config).await; + assert_eq!(available.len(), 2); + assert_eq!(available[0].model, "priority-high"); + assert!( + available[0].is_default, + "highest priority should be default" + ); + assert_eq!(available[1].model, "priority-low"); + assert!(!available[1].is_default); + assert_eq!( + models_mock.requests().len(), + 1, + "expected a single /models request" + ); + } + + #[tokio::test] + async fn refresh_available_models_uses_cache_when_fresh() { + let server = MockServer::start().await; + let remote_models = vec![remote_model("cached", "Cached", 5)]; + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: remote_models.clone(), + etag: String::new(), + }, + ) + .await; + + let codex_home = tempdir().expect("temp dir"); + let mut config = Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + ) + .expect("load default test config"); + config.features.enable(Feature::RemoteModels); + let auth_manager = Arc::new(AuthManager::new( + codex_home.path().to_path_buf(), + false, + AuthCredentialsStoreMode::File, + )); + let provider = provider_for(server.uri()); + let manager = ModelsManager::with_provider(auth_manager, provider); + + manager + .refresh_available_models(&config) + .await + .expect("first refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + remote_models, + "remote cache should store fetched models" + ); + + // Second call should read from cache and avoid the network. + manager + .refresh_available_models(&config) + .await + .expect("cached refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + remote_models, + "cache path should not mutate stored models" + ); + assert_eq!( + models_mock.requests().len(), + 1, + "cache hit should avoid a second /models request" + ); + } + + #[tokio::test] + async fn refresh_available_models_refetches_when_cache_stale() { + let server = MockServer::start().await; + let initial_models = vec![remote_model("stale", "Stale", 1)]; + let initial_mock = mount_models_once( + &server, + ModelsResponse { + models: initial_models.clone(), + etag: String::new(), + }, + ) + .await; + + let codex_home = tempdir().expect("temp dir"); + let mut config = Config::load_from_base_config_with_overrides( + ConfigToml::default(), + ConfigOverrides::default(), + codex_home.path().to_path_buf(), + ) + .expect("load default test config"); + config.features.enable(Feature::RemoteModels); + let auth_manager = Arc::new(AuthManager::new( + codex_home.path().to_path_buf(), + false, + AuthCredentialsStoreMode::File, + )); + let provider = provider_for(server.uri()); + let manager = ModelsManager::with_provider(auth_manager, provider); + + manager + .refresh_available_models(&config) + .await + .expect("initial refresh succeeds"); + + // Rewrite cache with an old timestamp so it is treated as stale. + let cache_path = codex_home.path().join(MODEL_CACHE_FILE); + let contents = + std::fs::read_to_string(&cache_path).expect("cache file should exist after refresh"); + let mut cache: ModelsCache = + serde_json::from_str(&contents).expect("cache should deserialize"); + cache.fetched_at = Utc::now() - chrono::Duration::hours(1); + std::fs::write(&cache_path, serde_json::to_string_pretty(&cache).unwrap()) + .expect("cache rewrite succeeds"); + + let updated_models = vec![remote_model("fresh", "Fresh", 9)]; + server.reset().await; + let refreshed_mock = mount_models_once( + &server, + ModelsResponse { + models: updated_models.clone(), + etag: String::new(), + }, + ) + .await; + + manager + .refresh_available_models(&config) + .await + .expect("second refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + updated_models, + "stale cache should trigger refetch" + ); + assert_eq!( + initial_mock.requests().len(), + 1, + "initial refresh should only hit /models once" + ); + assert_eq!( + refreshed_mock.requests().len(), + 1, + "stale cache refresh should fetch /models once" + ); + } +} diff --git a/codex-rs/core/src/parse_command.rs b/codex-rs/core/src/parse_command.rs index f335347042..399513f5ae 100644 --- a/codex-rs/core/src/parse_command.rs +++ b/codex-rs/core/src/parse_command.rs @@ -117,9 +117,6 @@ mod tests { query: None, path: None, }, - ParsedCommand::Unknown { - cmd: "head -n 40".to_string(), - }, ], ); } @@ -143,16 +140,11 @@ mod tests { let inner = "rg -n \"BUG|FIXME|TODO|XXX|HACK\" -S | head -n 200"; assert_parsed( &vec_str(&["bash", "-lc", inner]), - vec![ - ParsedCommand::Search { - cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(), - query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()), - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 200".to_string(), - }, - ], + vec![ParsedCommand::Search { + cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(), + query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()), + path: None, + }], ); } @@ -174,16 +166,11 @@ mod tests { let inner = "rg --files | head -n 50"; assert_parsed( &vec_str(&["bash", "-lc", inner]), - vec![ - ParsedCommand::Search { - cmd: "rg --files".to_string(), - query: None, - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 50".to_string(), - }, - ], + vec![ParsedCommand::Search { + cmd: "rg --files".to_string(), + query: None, + path: None, + }], ); } @@ -273,6 +260,19 @@ mod tests { ); } + #[test] + fn supports_head_file_only() { + let inner = "head Cargo.toml"; + assert_parsed( + &vec_str(&["bash", "-lc", inner]), + vec![ParsedCommand::Read { + cmd: inner.to_string(), + name: "Cargo.toml".to_string(), + path: PathBuf::from("Cargo.toml"), + }], + ); + } + #[test] fn supports_cat_sed_n() { let inner = "cat tui/Cargo.toml | sed -n '1,200p'"; @@ -313,6 +313,19 @@ mod tests { ); } + #[test] + fn supports_tail_file_only() { + let inner = "tail README.md"; + assert_parsed( + &vec_str(&["bash", "-lc", inner]), + vec![ParsedCommand::Read { + cmd: inner.to_string(), + name: "README.md".to_string(), + path: PathBuf::from("README.md"), + }], + ); + } + #[test] fn supports_npm_run_build_is_unknown() { assert_parsed( @@ -391,6 +404,19 @@ mod tests { ); } + #[test] + fn supports_single_string_script_with_cd_and_pipe() { + let inner = r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#; + assert_parsed( + &vec_str(&["bash", "-lc", inner]), + vec![ParsedCommand::Search { + cmd: "rg -n codex_api codex-rs -S".to_string(), + query: Some("codex_api".to_string()), + path: Some("codex-rs".to_string()), + }], + ); + } + // ---- is_small_formatting_command unit tests ---- #[test] fn small_formatting_always_true_commands() { @@ -408,38 +434,43 @@ mod tests { fn head_behavior() { // No args -> small formatting assert!(is_small_formatting_command(&vec_str(&["head"]))); - // Numeric count only -> not considered small formatting by implementation - assert!(!is_small_formatting_command(&shlex_split_safe( - "head -n 40" - ))); + // Numeric count only -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("head -n 40"))); // With explicit file -> not small formatting assert!(!is_small_formatting_command(&shlex_split_safe( "head -n 40 file.txt" ))); - // File only (no count) -> treated as small formatting by implementation - assert!(is_small_formatting_command(&vec_str(&["head", "file.txt"]))); + // File only (no count) -> not formatting + assert!(!is_small_formatting_command(&vec_str(&[ + "head", "file.txt" + ]))); } #[test] fn tail_behavior() { // No args -> small formatting assert!(is_small_formatting_command(&vec_str(&["tail"]))); - // Numeric with plus offset -> not small formatting - assert!(!is_small_formatting_command(&shlex_split_safe( + // Numeric with plus offset -> formatting + assert!(is_small_formatting_command(&shlex_split_safe( "tail -n +10" ))); assert!(!is_small_formatting_command(&shlex_split_safe( "tail -n +10 file.txt" ))); - // Numeric count - assert!(!is_small_formatting_command(&shlex_split_safe( - "tail -n 30" - ))); + // Numeric count -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("tail -n 30"))); assert!(!is_small_formatting_command(&shlex_split_safe( "tail -n 30 file.txt" ))); - // File only -> small formatting by implementation - assert!(is_small_formatting_command(&vec_str(&["tail", "file.txt"]))); + // Byte count -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("tail -c 30"))); + assert!(is_small_formatting_command(&shlex_split_safe( + "tail -c +10" + ))); + // File only (no count) -> not formatting + assert!(!is_small_formatting_command(&vec_str(&[ + "tail", "file.txt" + ]))); } #[test] @@ -714,20 +745,15 @@ mod tests { #[test] fn bash_dash_c_pipeline_parsing() { - // Ensure -c is handled similarly to -lc by normalization + // Ensure -c is handled similarly to -lc by shell parsing let inner = "rg --files | head -n 1"; assert_parsed( - &shlex_split_safe(inner), - vec![ - ParsedCommand::Search { - cmd: "rg --files".to_string(), - query: None, - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 1".to_string(), - }, - ], + &vec_str(&["bash", "-c", inner]), + vec![ParsedCommand::Search { + cmd: "rg --files".to_string(), + query: None, + path: None, + }], ); } @@ -1384,13 +1410,50 @@ fn is_small_formatting_command(tokens: &[String]) -> bool { // Treat as formatting when no explicit file operand is present. // Common forms: `head -n 40`, `head -c 100`. // Keep cases like `head -n 40 file`. - tokens.len() < 3 + match tokens { + // `head` + [_] => true, + // `head ` or `head -n50`/`head -c100` + [_, arg] => arg.starts_with('-'), + // `head -n 40` / `head -c 100` (no file operand) + [_, flag, count] + if (flag == "-n" || flag == "-c") + && count.chars().all(|c| c.is_ascii_digit()) => + { + true + } + _ => false, + } } "tail" => { // Treat as formatting when no explicit file operand is present. - // Common forms: `tail -n +10`, `tail -n 30`. + // Common forms: `tail -n +10`, `tail -n 30`, `tail -c 100`. // Keep cases like `tail -n 30 file`. - tokens.len() < 3 + match tokens { + // `tail` + [_] => true, + // `tail ` or `tail -n30`/`tail -n+10` + [_, arg] => arg.starts_with('-'), + // `tail -n 30` / `tail -n +10` (no file operand) + [_, flag, count] + if flag == "-n" + && (count.chars().all(|c| c.is_ascii_digit()) + || (count.starts_with('+') + && count[1..].chars().all(|c| c.is_ascii_digit()))) => + { + true + } + // `tail -c 100` / `tail -c +10` (no file operand) + [_, flag, count] + if flag == "-c" + && (count.chars().all(|c| c.is_ascii_digit()) + || (count.starts_with('+') + && count[1..].chars().all(|c| c.is_ascii_digit()))) => + { + true + } + _ => false, + } } "sed" => { // Keep `sed -n file` (treated as a file read elsewhere); @@ -1543,6 +1606,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand { }; } } + if let [path] = tail + && !path.starts_with('-') + { + let name = short_display_path(path); + return ParsedCommand::Read { + cmd: shlex_join(main_cmd), + name, + path: PathBuf::from(path), + }; + } ParsedCommand::Unknown { cmd: shlex_join(main_cmd), } @@ -1587,6 +1660,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand { }; } } + if let [path] = tail + && !path.starts_with('-') + { + let name = short_display_path(path); + return ParsedCommand::Read { + cmd: shlex_join(main_cmd), + name, + path: PathBuf::from(path), + }; + } ParsedCommand::Unknown { cmd: shlex_join(main_cmd), } diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index ee3148e7e7..cd05520110 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -15,7 +15,7 @@ use crate::config::Config; use crate::features::Feature; -use crate::skills::load_skills; +use crate::skills::SkillMetadata; use crate::skills::render_skills_section; use dunce::canonicalize as normalize_path; use std::path::PathBuf; @@ -33,17 +33,12 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single /// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { +pub(crate) async fn get_user_instructions( + config: &Config, + skills: Option<&[SkillMetadata]>, +) -> Option { let skills_section = if config.features.enabled(Feature::Skills) { - let skills_outcome = load_skills(config); - for err in &skills_outcome.errors { - error!( - "failed to load skill {}: {}", - err.path.display(), - err.message - ); - } - render_skills_section(&skills_outcome.skills) + skills.and_then(render_skills_section) } else { None }; @@ -244,6 +239,7 @@ mod tests { use super::*; use crate::config::ConfigOverrides; use crate::config::ConfigToml; + use crate::skills::load_skills; use std::fs; use std::path::PathBuf; use tempfile::TempDir; @@ -289,7 +285,7 @@ mod tests { async fn no_doc_file_returns_none() { let tmp = tempfile::tempdir().expect("tempdir"); - let res = get_user_instructions(&make_config(&tmp, 4096, None)).await; + let res = get_user_instructions(&make_config(&tmp, 4096, None), None).await; assert!( res.is_none(), "Expected None when AGENTS.md is absent and no system instructions provided" @@ -303,7 +299,7 @@ mod tests { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap(); - let res = get_user_instructions(&make_config(&tmp, 4096, None)) + let res = get_user_instructions(&make_config(&tmp, 4096, None), None) .await .expect("doc expected"); @@ -322,7 +318,7 @@ mod tests { let huge = "A".repeat(LIMIT * 2); // 2 KiB fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap(); - let res = get_user_instructions(&make_config(&tmp, LIMIT, None)) + let res = get_user_instructions(&make_config(&tmp, LIMIT, None), None) .await .expect("doc expected"); @@ -354,7 +350,9 @@ mod tests { let mut cfg = make_config(&repo, 4096, None); cfg.cwd = nested; - let res = get_user_instructions(&cfg).await.expect("doc expected"); + let res = get_user_instructions(&cfg, None) + .await + .expect("doc expected"); assert_eq!(res, "root level doc"); } @@ -364,7 +362,7 @@ mod tests { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "something").unwrap(); - let res = get_user_instructions(&make_config(&tmp, 0, None)).await; + let res = get_user_instructions(&make_config(&tmp, 0, None), None).await; assert!( res.is_none(), "With limit 0 the function should return None" @@ -380,7 +378,7 @@ mod tests { const INSTRUCTIONS: &str = "base instructions"; - let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))) + let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)), None) .await .expect("should produce a combined instruction string"); @@ -397,7 +395,7 @@ mod tests { const INSTRUCTIONS: &str = "some instructions"; - let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await; + let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)), None).await; assert_eq!(res, Some(INSTRUCTIONS.to_string())); } @@ -426,7 +424,9 @@ mod tests { let mut cfg = make_config(&repo, 4096, None); cfg.cwd = nested; - let res = get_user_instructions(&cfg).await.expect("doc expected"); + let res = get_user_instructions(&cfg, None) + .await + .expect("doc expected"); assert_eq!(res, "root doc\n\ncrate doc"); } @@ -439,7 +439,7 @@ mod tests { let cfg = make_config(&tmp, 4096, None); - let res = get_user_instructions(&cfg) + let res = get_user_instructions(&cfg, None) .await .expect("local doc expected"); @@ -461,7 +461,7 @@ mod tests { let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md"]); - let res = get_user_instructions(&cfg) + let res = get_user_instructions(&cfg, None) .await .expect("fallback doc expected"); @@ -477,7 +477,7 @@ mod tests { let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md", ".example.md"]); - let res = get_user_instructions(&cfg) + let res = get_user_instructions(&cfg, None) .await .expect("AGENTS.md should win"); @@ -506,9 +506,13 @@ mod tests { "extract from pdfs", ); - let res = get_user_instructions(&cfg) - .await - .expect("instructions expected"); + let skills = load_skills(&cfg); + let res = get_user_instructions( + &cfg, + skills.errors.is_empty().then_some(skills.skills.as_slice()), + ) + .await + .expect("instructions expected"); let expected_path = dunce::canonicalize( cfg.codex_home .join("skills/pdf-processing/SKILL.md") @@ -516,8 +520,9 @@ mod tests { ) .unwrap_or_else(|_| cfg.codex_home.join("skills/pdf-processing/SKILL.md")); let expected_path_str = expected_path.to_string_lossy().replace('\\', "/"); + let usage_rules = "- Discovery: Available skills are listed in project docs and may also appear in a runtime \"## Skills\" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; let expected = format!( - "base doc\n\n## Skills\nThese skills are discovered at startup from ~/.codex/skills; each entry shows name, description, and file path so you can open the source for full instructions. Content is not inlined to keep context lean.\n- pdf-processing: extract from pdfs (file: {expected_path_str})" + "base doc\n\n## Skills\nThese skills are discovered at startup from ~/.codex/skills; each entry shows name, description, and file path so you can open the source for full instructions. Content is not inlined to keep context lean.\n- pdf-processing: extract from pdfs (file: {expected_path_str})\n{usage_rules}" ); assert_eq!(res, expected); } @@ -528,15 +533,20 @@ mod tests { let cfg = make_config(&tmp, 4096, None); create_skill(cfg.codex_home.clone(), "linting", "run clippy"); - let res = get_user_instructions(&cfg) - .await - .expect("instructions expected"); + let skills = load_skills(&cfg); + let res = get_user_instructions( + &cfg, + skills.errors.is_empty().then_some(skills.skills.as_slice()), + ) + .await + .expect("instructions expected"); let expected_path = dunce::canonicalize(cfg.codex_home.join("skills/linting/SKILL.md").as_path()) .unwrap_or_else(|_| cfg.codex_home.join("skills/linting/SKILL.md")); let expected_path_str = expected_path.to_string_lossy().replace('\\', "/"); + let usage_rules = "- Discovery: Available skills are listed in project docs and may also appear in a runtime \"## Skills\" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths.\n- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback.\n- How to use a skill (progressive disclosure):\n 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow.\n 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything.\n 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.\n 4) If `assets/` or templates exist, reuse them instead of recreating from scratch.\n- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding.\n- Coordination and sequencing:\n - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.\n - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.\n- Context hygiene:\n - Keep context small: summarize long sections instead of pasting them; only load extra files when needed.\n - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`.\n - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice.\n- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."; let expected = format!( - "## Skills\nThese skills are discovered at startup from ~/.codex/skills; each entry shows name, description, and file path so you can open the source for full instructions. Content is not inlined to keep context lean.\n- linting: run clippy (file: {expected_path_str})" + "## Skills\nThese skills are discovered at startup from ~/.codex/skills; each entry shows name, description, and file path so you can open the source for full instructions. Content is not inlined to keep context lean.\n- linting: run clippy (file: {expected_path_str})\n{usage_rules}" ); assert_eq!(res, expected); } diff --git a/codex-rs/core/src/response_processing.rs b/codex-rs/core/src/response_processing.rs deleted file mode 100644 index 458f82526a..0000000000 --- a/codex-rs/core/src/response_processing.rs +++ /dev/null @@ -1,70 +0,0 @@ -use crate::codex::Session; -use crate::codex::TurnContext; -use codex_protocol::models::FunctionCallOutputPayload; -use codex_protocol::models::ResponseInputItem; -use codex_protocol::models::ResponseItem; -use tracing::warn; - -/// Process streamed `ResponseItem`s from the model into the pair of: -/// - items we should record in conversation history; and -/// - `ResponseInputItem`s to send back to the model on the next turn. -pub(crate) async fn process_items( - processed_items: Vec, - sess: &Session, - turn_context: &TurnContext, -) -> (Vec, Vec) { - let mut outputs_to_record = Vec::::new(); - let mut new_inputs_to_record = Vec::::new(); - let mut responses = Vec::::new(); - for processed_response_item in processed_items { - let crate::codex::ProcessedResponseItem { item, response } = processed_response_item; - - if let Some(response) = &response { - responses.push(response.clone()); - } - - match response { - Some(ResponseInputItem::FunctionCallOutput { call_id, output }) => { - new_inputs_to_record.push(ResponseItem::FunctionCallOutput { - call_id: call_id.clone(), - output: output.clone(), - }); - } - - Some(ResponseInputItem::CustomToolCallOutput { call_id, output }) => { - new_inputs_to_record.push(ResponseItem::CustomToolCallOutput { - call_id: call_id.clone(), - output: output.clone(), - }); - } - Some(ResponseInputItem::McpToolCallOutput { call_id, result }) => { - let output = match result { - Ok(call_tool_result) => FunctionCallOutputPayload::from(&call_tool_result), - Err(err) => FunctionCallOutputPayload { - content: err.clone(), - success: Some(false), - ..Default::default() - }, - }; - new_inputs_to_record.push(ResponseItem::FunctionCallOutput { - call_id: call_id.clone(), - output, - }); - } - None => {} - _ => { - warn!("Unexpected response item: {item:?} with response: {response:?}"); - } - }; - - outputs_to_record.push(item); - } - - let all_items_to_record = [outputs_to_record, new_inputs_to_record].concat(); - // Only attempt to take the lock if there is something to record. - if !all_items_to_record.is_empty() { - sess.record_conversation_items(turn_context, &all_items_to_record) - .await; - } - (responses, all_items_to_record) -} diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 58072f9336..fc6e4b9afd 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -62,6 +62,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::WebSearchBegin(_) | EventMsg::WebSearchEnd(_) | EventMsg::ExecCommandBegin(_) + | EventMsg::TerminalInteraction(_) | EventMsg::ExecCommandOutputDelta(_) | EventMsg::ExecCommandEnd(_) | EventMsg::ExecApprovalRequest(_) diff --git a/codex-rs/core/src/sandboxing/assessment.rs b/codex-rs/core/src/sandboxing/assessment.rs deleted file mode 100644 index 719e3be1f0..0000000000 --- a/codex-rs/core/src/sandboxing/assessment.rs +++ /dev/null @@ -1,262 +0,0 @@ -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; - -use crate::AuthManager; -use crate::ModelProviderInfo; -use crate::client::ModelClient; -use crate::client_common::Prompt; -use crate::client_common::ResponseEvent; -use crate::config::Config; -use crate::protocol::SandboxPolicy; -use askama::Template; -use codex_otel::otel_event_manager::OtelEventManager; -use codex_protocol::ConversationId; -use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig; -use codex_protocol::models::ContentItem; -use codex_protocol::models::ResponseItem; -use codex_protocol::protocol::SandboxCommandAssessment; -use codex_protocol::protocol::SessionSource; -use futures::StreamExt; -use serde_json::json; -use tokio::time::timeout; -use tracing::warn; - -const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(15); -const SANDBOX_ASSESSMENT_REASONING_EFFORT: ReasoningEffortConfig = ReasoningEffortConfig::Medium; - -#[derive(Template)] -#[template(path = "sandboxing/assessment_prompt.md", escape = "none")] -struct SandboxAssessmentPromptTemplate<'a> { - platform: &'a str, - sandbox_policy: &'a str, - filesystem_roots: Option<&'a str>, - working_directory: &'a str, - command_argv: &'a str, - command_joined: &'a str, - sandbox_failure_message: Option<&'a str>, -} - -#[allow(clippy::too_many_arguments)] -pub(crate) async fn assess_command( - config: Arc, - provider: ModelProviderInfo, - auth_manager: Arc, - parent_otel: &OtelEventManager, - conversation_id: ConversationId, - session_source: SessionSource, - call_id: &str, - command: &[String], - sandbox_policy: &SandboxPolicy, - cwd: &Path, - failure_message: Option<&str>, -) -> Option { - if !config.experimental_sandbox_command_assessment || command.is_empty() { - return None; - } - - let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string()); - let command_joined = - shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" ")); - let failure = failure_message - .map(str::trim) - .filter(|msg| !msg.is_empty()) - .map(str::to_string); - - let cwd_str = cwd.to_string_lossy().to_string(); - let sandbox_summary = summarize_sandbox_policy(sandbox_policy); - let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd); - roots.sort(); - roots.dedup(); - - let platform = std::env::consts::OS; - let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string()); - let filesystem_roots = match roots_formatted.collect::>() { - collected if collected.is_empty() => None, - collected => Some(collected.join(", ")), - }; - - let prompt_template = SandboxAssessmentPromptTemplate { - platform, - sandbox_policy: sandbox_summary.as_str(), - filesystem_roots: filesystem_roots.as_deref(), - working_directory: cwd_str.as_str(), - command_argv: command_json.as_str(), - command_joined: command_joined.as_str(), - sandbox_failure_message: failure.as_deref(), - }; - let rendered_prompt = match prompt_template.render() { - Ok(rendered) => rendered, - Err(err) => { - warn!("failed to render sandbox assessment prompt: {err}"); - return None; - } - }; - let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") { - Some(split) => split, - None => { - warn!("rendered sandbox assessment prompt missing separator"); - return None; - } - }; - let system_prompt = system_prompt_section - .strip_prefix("System Prompt:\n") - .unwrap_or(system_prompt_section) - .trim() - .to_string(); - let user_prompt = user_prompt_section - .strip_prefix("User Prompt:\n") - .unwrap_or(user_prompt_section) - .trim() - .to_string(); - - let prompt = Prompt { - input: vec![ResponseItem::Message { - id: None, - role: "user".to_string(), - content: vec![ContentItem::InputText { text: user_prompt }], - }], - tools: Vec::new(), - parallel_tool_calls: false, - base_instructions_override: Some(system_prompt), - output_schema: Some(sandbox_assessment_schema()), - }; - - let child_otel = - parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str()); - - let client = ModelClient::new( - Arc::clone(&config), - Some(auth_manager), - child_otel, - provider, - Some(SANDBOX_ASSESSMENT_REASONING_EFFORT), - config.model_reasoning_summary, - conversation_id, - session_source, - ); - - let start = Instant::now(); - let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move { - let mut stream = client.stream(&prompt).await?; - let mut last_json: Option = None; - while let Some(event) = stream.next().await { - match event { - Ok(ResponseEvent::OutputItemDone(item)) => { - if let Some(text) = response_item_text(&item) { - last_json = Some(text); - } - } - Ok(ResponseEvent::RateLimits(_)) => {} - Ok(ResponseEvent::Completed { .. }) => break, - Ok(_) => continue, - Err(err) => return Err(err), - } - } - Ok(last_json) - }) - .await; - let duration = start.elapsed(); - parent_otel.sandbox_assessment_latency(call_id, duration); - - match assessment_result { - Ok(Ok(Some(raw))) => match serde_json::from_str::(raw.trim()) { - Ok(assessment) => { - parent_otel.sandbox_assessment( - call_id, - "success", - Some(assessment.risk_level), - duration, - ); - return Some(assessment); - } - Err(err) => { - warn!("failed to parse sandbox assessment JSON: {err}"); - parent_otel.sandbox_assessment(call_id, "parse_error", None, duration); - } - }, - Ok(Ok(None)) => { - warn!("sandbox assessment response did not include any message"); - parent_otel.sandbox_assessment(call_id, "no_output", None, duration); - } - Ok(Err(err)) => { - warn!("sandbox assessment failed: {err}"); - parent_otel.sandbox_assessment(call_id, "model_error", None, duration); - } - Err(_) => { - warn!("sandbox assessment timed out"); - parent_otel.sandbox_assessment(call_id, "timeout", None, duration); - } - } - - None -} - -fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String { - match policy { - SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(), - SandboxPolicy::ReadOnly => "read-only".to_string(), - SandboxPolicy::WorkspaceWrite { network_access, .. } => { - let network = if *network_access { - "network" - } else { - "no-network" - }; - format!("workspace-write (network_access={network})") - } - } -} - -fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec { - let mut roots = vec![cwd.to_path_buf()]; - if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy { - roots.extend(writable_roots.iter().cloned()); - } - roots -} - -fn sandbox_assessment_schema() -> serde_json::Value { - json!({ - "type": "object", - "required": ["description", "risk_level"], - "properties": { - "description": { - "type": "string", - "minLength": 1, - "maxLength": 500 - }, - "risk_level": { - "type": "string", - "enum": ["low", "medium", "high"] - }, - }, - "additionalProperties": false - }) -} - -fn response_item_text(item: &ResponseItem) -> Option { - match item { - ResponseItem::Message { content, .. } => { - let mut buffers: Vec<&str> = Vec::new(); - for segment in content { - match segment { - ContentItem::InputText { text } | ContentItem::OutputText { text } => { - if !text.is_empty() { - buffers.push(text); - } - } - ContentItem::InputImage { .. } => {} - } - } - if buffers.is_empty() { - None - } else { - Some(buffers.join("\n")) - } - } - ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()), - _ => None, - } -} diff --git a/codex-rs/core/src/sandboxing/mod.rs b/codex-rs/core/src/sandboxing/mod.rs index d43646021e..5d719a7922 100644 --- a/codex-rs/core/src/sandboxing/mod.rs +++ b/codex-rs/core/src/sandboxing/mod.rs @@ -6,8 +6,6 @@ sandbox placement and transformation of portable CommandSpec into a ready‑to‑spawn environment. */ -pub mod assessment; - use crate::exec::ExecExpiration; use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; @@ -23,32 +21,11 @@ use crate::seatbelt::create_seatbelt_command_args; use crate::spawn::CODEX_SANDBOX_ENV_VAR; use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use crate::tools::sandboxing::SandboxablePreference; +pub use codex_protocol::models::SandboxPermissions; use std::collections::HashMap; use std::path::Path; use std::path::PathBuf; -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum SandboxPermissions { - UseDefault, - RequireEscalated, -} - -impl SandboxPermissions { - pub fn requires_escalated_permissions(self) -> bool { - matches!(self, SandboxPermissions::RequireEscalated) - } -} - -impl From for SandboxPermissions { - fn from(with_escalated_permissions: bool) -> Self { - if with_escalated_permissions { - SandboxPermissions::RequireEscalated - } else { - SandboxPermissions::UseDefault - } - } -} - #[derive(Debug)] pub struct CommandSpec { pub program: String, @@ -56,7 +33,7 @@ pub struct CommandSpec { pub cwd: PathBuf, pub env: HashMap, pub expiration: ExecExpiration, - pub with_escalated_permissions: Option, + pub sandbox_permissions: SandboxPermissions, pub justification: Option, } @@ -67,7 +44,7 @@ pub struct ExecEnv { pub env: HashMap, pub expiration: ExecExpiration, pub sandbox: SandboxType, - pub with_escalated_permissions: Option, + pub sandbox_permissions: SandboxPermissions, pub justification: Option, pub arg0: Option, } @@ -181,7 +158,7 @@ impl SandboxManager { env, expiration: spec.expiration, sandbox, - with_escalated_permissions: spec.with_escalated_permissions, + sandbox_permissions: spec.sandbox_permissions, justification: spec.justification, arg0: arg0_override, }) diff --git a/codex-rs/core/src/seatbelt_base_policy.sbpl b/codex-rs/core/src/seatbelt_base_policy.sbpl index 8ccfa6e824..236f7a1398 100644 --- a/codex-rs/core/src/seatbelt_base_policy.sbpl +++ b/codex-rs/core/src/seatbelt_base_policy.sbpl @@ -102,3 +102,6 @@ (require-all (regex #"^/dev/ttys[0-9]+") (extension "com.apple.sandbox.pty"))) +; PTYs created before entering seatbelt may lack the extension; allow ioctl +; on those slave ttys so interactive shells detect a TTY and remain functional. +(allow file-ioctl (regex #"^/dev/ttys[0-9]+")) diff --git a/codex-rs/core/src/shell.rs b/codex-rs/core/src/shell.rs index ac115facb6..d22b6543a9 100644 --- a/codex-rs/core/src/shell.rs +++ b/codex-rs/core/src/shell.rs @@ -1,6 +1,9 @@ use serde::Deserialize; use serde::Serialize; use std::path::PathBuf; +use std::sync::Arc; + +use crate::shell_snapshot::ShellSnapshot; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] pub enum ShellType { @@ -15,6 +18,8 @@ pub enum ShellType { pub struct Shell { pub(crate) shell_type: ShellType, pub(crate) shell_path: PathBuf, + #[serde(skip_serializing, skip_deserializing, default)] + pub(crate) shell_snapshot: Option>, } impl Shell { @@ -134,6 +139,7 @@ fn get_zsh_shell(path: Option<&PathBuf>) -> Option { shell_path.map(|shell_path| Shell { shell_type: ShellType::Zsh, shell_path, + shell_snapshot: None, }) } @@ -143,6 +149,7 @@ fn get_bash_shell(path: Option<&PathBuf>) -> Option { shell_path.map(|shell_path| Shell { shell_type: ShellType::Bash, shell_path, + shell_snapshot: None, }) } @@ -152,6 +159,7 @@ fn get_sh_shell(path: Option<&PathBuf>) -> Option { shell_path.map(|shell_path| Shell { shell_type: ShellType::Sh, shell_path, + shell_snapshot: None, }) } @@ -167,6 +175,7 @@ fn get_powershell_shell(path: Option<&PathBuf>) -> Option { shell_path.map(|shell_path| Shell { shell_type: ShellType::PowerShell, shell_path, + shell_snapshot: None, }) } @@ -176,6 +185,7 @@ fn get_cmd_shell(path: Option<&PathBuf>) -> Option { shell_path.map(|shell_path| Shell { shell_type: ShellType::Cmd, shell_path, + shell_snapshot: None, }) } @@ -184,11 +194,13 @@ fn ultimate_fallback_shell() -> Shell { Shell { shell_type: ShellType::Cmd, shell_path: PathBuf::from("cmd.exe"), + shell_snapshot: None, } } else { Shell { shell_type: ShellType::Sh, shell_path: PathBuf::from("/bin/sh"), + shell_snapshot: None, } } } @@ -408,6 +420,51 @@ mod tests { } } + #[test] + fn derive_exec_args() { + let test_bash_shell = Shell { + shell_type: ShellType::Bash, + shell_path: PathBuf::from("/bin/bash"), + shell_snapshot: None, + }; + assert_eq!( + test_bash_shell.derive_exec_args("echo hello", false), + vec!["/bin/bash", "-c", "echo hello"] + ); + assert_eq!( + test_bash_shell.derive_exec_args("echo hello", true), + vec!["/bin/bash", "-lc", "echo hello"] + ); + + let test_zsh_shell = Shell { + shell_type: ShellType::Zsh, + shell_path: PathBuf::from("/bin/zsh"), + shell_snapshot: None, + }; + assert_eq!( + test_zsh_shell.derive_exec_args("echo hello", false), + vec!["/bin/zsh", "-c", "echo hello"] + ); + assert_eq!( + test_zsh_shell.derive_exec_args("echo hello", true), + vec!["/bin/zsh", "-lc", "echo hello"] + ); + + let test_powershell_shell = Shell { + shell_type: ShellType::PowerShell, + shell_path: PathBuf::from("pwsh.exe"), + shell_snapshot: None, + }; + assert_eq!( + test_powershell_shell.derive_exec_args("echo hello", false), + vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"] + ); + assert_eq!( + test_powershell_shell.derive_exec_args("echo hello", true), + vec!["pwsh.exe", "-Command", "echo hello"] + ); + } + #[tokio::test] async fn test_current_shell_detects_zsh() { let shell = Command::new("sh") @@ -423,6 +480,7 @@ mod tests { Shell { shell_type: ShellType::Zsh, shell_path: PathBuf::from(shell_path), + shell_snapshot: None, } ); } diff --git a/codex-rs/core/src/shell_snapshot.rs b/codex-rs/core/src/shell_snapshot.rs new file mode 100644 index 0000000000..b275487563 --- /dev/null +++ b/codex-rs/core/src/shell_snapshot.rs @@ -0,0 +1,416 @@ +use std::path::Path; +use std::path::PathBuf; +use std::time::Duration; + +use crate::shell::Shell; +use crate::shell::ShellType; +use crate::shell::get_shell; +use anyhow::Context; +use anyhow::Result; +use anyhow::anyhow; +use anyhow::bail; +use tokio::fs; +use tokio::process::Command; +use tokio::time::timeout; +use uuid::Uuid; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ShellSnapshot { + pub path: PathBuf, +} + +const SNAPSHOT_TIMEOUT: Duration = Duration::from_secs(10); + +impl ShellSnapshot { + pub async fn try_new(codex_home: &Path, shell: &Shell) -> Option { + let extension = match shell.shell_type { + ShellType::PowerShell => "ps1", + _ => "sh", + }; + let path = + codex_home + .join("shell_snapshots") + .join(format!("{}.{}", Uuid::new_v4(), extension)); + match write_shell_snapshot(shell.shell_type.clone(), &path).await { + Ok(path) => { + tracing::info!("Shell snapshot successfully created: {}", path.display()); + Some(Self { path }) + } + Err(err) => { + tracing::warn!( + "Failed to create shell snapshot for {}: {err:?}", + shell.name() + ); + None + } + } + } +} + +impl Drop for ShellSnapshot { + fn drop(&mut self) { + if let Err(err) = std::fs::remove_file(&self.path) { + tracing::warn!( + "Failed to delete shell snapshot at {:?}: {err:?}", + self.path + ); + } + } +} + +pub async fn write_shell_snapshot(shell_type: ShellType, output_path: &Path) -> Result { + if shell_type == ShellType::PowerShell || shell_type == ShellType::Cmd { + bail!("Shell snapshot not supported yet for {shell_type:?}"); + } + let shell = get_shell(shell_type.clone(), None) + .with_context(|| format!("No available shell for {shell_type:?}"))?; + + let raw_snapshot = capture_snapshot(&shell).await?; + let snapshot = strip_snapshot_preamble(&raw_snapshot)?; + + if let Some(parent) = output_path.parent() { + let parent_display = parent.display(); + fs::create_dir_all(parent) + .await + .with_context(|| format!("Failed to create snapshot parent {parent_display}"))?; + } + + let snapshot_path = output_path.display(); + fs::write(output_path, snapshot) + .await + .with_context(|| format!("Failed to write snapshot to {snapshot_path}"))?; + + Ok(output_path.to_path_buf()) +} + +async fn capture_snapshot(shell: &Shell) -> Result { + let shell_type = shell.shell_type.clone(); + match shell_type { + ShellType::Zsh => run_shell_script(shell, zsh_snapshot_script()).await, + ShellType::Bash => run_shell_script(shell, bash_snapshot_script()).await, + ShellType::Sh => run_shell_script(shell, sh_snapshot_script()).await, + ShellType::PowerShell => run_shell_script(shell, powershell_snapshot_script()).await, + ShellType::Cmd => bail!("Shell snapshotting is not yet supported for {shell_type:?}"), + } +} + +fn strip_snapshot_preamble(snapshot: &str) -> Result { + let marker = "# Snapshot file"; + let Some(start) = snapshot.find(marker) else { + bail!("Snapshot output missing marker {marker}"); + }; + + Ok(snapshot[start..].to_string()) +} + +async fn run_shell_script(shell: &Shell, script: &str) -> Result { + run_shell_script_with_timeout(shell, script, SNAPSHOT_TIMEOUT).await +} + +async fn run_shell_script_with_timeout( + shell: &Shell, + script: &str, + snapshot_timeout: Duration, +) -> Result { + let args = shell.derive_exec_args(script, true); + let shell_name = shell.name(); + + // Handler is kept as guard to control the drop. The `mut` pattern is required because .args() + // returns a ref of handler. + let mut handler = Command::new(&args[0]); + handler.args(&args[1..]); + handler.kill_on_drop(true); + let output = timeout(snapshot_timeout, handler.output()) + .await + .map_err(|_| anyhow!("Snapshot command timed out for {shell_name}"))? + .with_context(|| format!("Failed to execute {shell_name}"))?; + + if !output.status.success() { + let status = output.status; + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("Snapshot command exited with status {status}: {stderr}"); + } + + Ok(String::from_utf8_lossy(&output.stdout).into_owned()) +} + +fn zsh_snapshot_script() -> &'static str { + r##"print '# Snapshot file' +print '# Unset all aliases to avoid conflicts with functions' +print 'unalias -a 2>/dev/null || true' +print '# Functions' +functions +print '' +setopt_count=$(setopt | wc -l | tr -d ' ') +print "# setopts $setopt_count" +setopt | sed 's/^/setopt /' +print '' +alias_count=$(alias -L | wc -l | tr -d ' ') +print "# aliases $alias_count" +alias -L +print '' +export_count=$(export -p | wc -l | tr -d ' ') +print "# exports $export_count" +export -p +"## +} + +fn bash_snapshot_script() -> &'static str { + r##"echo '# Snapshot file' +echo '# Unset all aliases to avoid conflicts with functions' +unalias -a 2>/dev/null || true +echo '# Functions' +declare -f +echo '' +bash_opts=$(set -o | awk '$2=="on"{print $1}') +bash_opt_count=$(printf '%s\n' "$bash_opts" | sed '/^$/d' | wc -l | tr -d ' ') +echo "# setopts $bash_opt_count" +if [ -n "$bash_opts" ]; then + printf 'set -o %s\n' $bash_opts +fi +echo '' +alias_count=$(alias -p | wc -l | tr -d ' ') +echo "# aliases $alias_count" +alias -p +echo '' +export_count=$(export -p | wc -l | tr -d ' ') +echo "# exports $export_count" +export -p +"## +} + +fn sh_snapshot_script() -> &'static str { + r##"echo '# Snapshot file' +echo '# Unset all aliases to avoid conflicts with functions' +unalias -a 2>/dev/null || true +echo '# Functions' +if command -v typeset >/dev/null 2>&1; then + typeset -f +elif command -v declare >/dev/null 2>&1; then + declare -f +fi +echo '' +if set -o >/dev/null 2>&1; then + sh_opts=$(set -o | awk '$2=="on"{print $1}') + sh_opt_count=$(printf '%s\n' "$sh_opts" | sed '/^$/d' | wc -l | tr -d ' ') + echo "# setopts $sh_opt_count" + if [ -n "$sh_opts" ]; then + printf 'set -o %s\n' $sh_opts + fi +else + echo '# setopts 0' +fi +echo '' +if alias >/dev/null 2>&1; then + alias_count=$(alias | wc -l | tr -d ' ') + echo "# aliases $alias_count" + alias + echo '' +else + echo '# aliases 0' +fi +if export -p >/dev/null 2>&1; then + export_count=$(export -p | wc -l | tr -d ' ') + echo "# exports $export_count" + export -p +else + export_count=$(env | wc -l | tr -d ' ') + echo "# exports $export_count" + env | sort | while IFS='=' read -r key value; do + escaped=$(printf "%s" "$value" | sed "s/'/'\"'\"'/g") + printf "export %s='%s'\n" "$key" "$escaped" + done +fi +"## +} + +fn powershell_snapshot_script() -> &'static str { + r##"$ErrorActionPreference = 'Stop' +Write-Output '# Snapshot file' +Write-Output '# Unset all aliases to avoid conflicts with functions' +Write-Output 'Remove-Item Alias:* -ErrorAction SilentlyContinue' +Write-Output '# Functions' +Get-ChildItem Function: | ForEach-Object { + "function {0} {{`n{1}`n}}" -f $_.Name, $_.Definition +} +Write-Output '' +$aliases = Get-Alias +Write-Output ("# aliases " + $aliases.Count) +$aliases | ForEach-Object { + "Set-Alias -Name {0} -Value {1}" -f $_.Name, $_.Definition +} +Write-Output '' +$envVars = Get-ChildItem Env: +Write-Output ("# exports " + $envVars.Count) +$envVars | ForEach-Object { + $escaped = $_.Value -replace "'", "''" + "`$env:{0}='{1}'" -f $_.Name, $escaped +} +"## +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + #[cfg(target_os = "linux")] + use std::os::unix::fs::PermissionsExt; + #[cfg(target_os = "linux")] + use std::process::Command as StdCommand; + + use tempfile::tempdir; + + #[cfg(not(target_os = "windows"))] + fn assert_posix_snapshot_sections(snapshot: &str) { + assert!(snapshot.contains("# Snapshot file")); + assert!(snapshot.contains("aliases ")); + assert!(snapshot.contains("exports ")); + assert!( + snapshot.contains("PATH"), + "snapshot should capture a PATH export" + ); + assert!(snapshot.contains("setopts ")); + } + + async fn get_snapshot(shell_type: ShellType) -> Result { + let dir = tempdir()?; + let path = dir.path().join("snapshot.sh"); + write_shell_snapshot(shell_type, &path).await?; + let content = fs::read_to_string(&path).await?; + Ok(content) + } + + #[test] + fn strip_snapshot_preamble_removes_leading_output() { + let snapshot = "noise\n# Snapshot file\nexport PATH=/bin\n"; + let cleaned = strip_snapshot_preamble(snapshot).expect("snapshot marker exists"); + assert_eq!(cleaned, "# Snapshot file\nexport PATH=/bin\n"); + } + + #[test] + fn strip_snapshot_preamble_requires_marker() { + let result = strip_snapshot_preamble("missing header"); + assert!(result.is_err()); + } + + #[cfg(unix)] + #[tokio::test] + async fn try_new_creates_and_deletes_snapshot_file() -> Result<()> { + let dir = tempdir()?; + let shell = Shell { + shell_type: ShellType::Bash, + shell_path: PathBuf::from("/bin/bash"), + shell_snapshot: None, + }; + + let snapshot = ShellSnapshot::try_new(dir.path(), &shell) + .await + .expect("snapshot should be created"); + let path = snapshot.path.clone(); + assert!(path.exists()); + + drop(snapshot); + + assert!(!path.exists()); + + Ok(()) + } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn timed_out_snapshot_shell_is_terminated() -> Result<()> { + use std::process::Stdio; + use tokio::time::Duration as TokioDuration; + use tokio::time::Instant; + use tokio::time::sleep; + + let dir = tempdir()?; + let shell_path = dir.path().join("hanging-shell.sh"); + let pid_path = dir.path().join("pid"); + + let script = format!( + "#!/bin/sh\n\ + echo $$ > {}\n\ + sleep 30\n", + pid_path.display() + ); + fs::write(&shell_path, script).await?; + let mut permissions = std::fs::metadata(&shell_path)?.permissions(); + permissions.set_mode(0o755); + std::fs::set_permissions(&shell_path, permissions)?; + + let shell = Shell { + shell_type: ShellType::Sh, + shell_path, + shell_snapshot: None, + }; + + let err = run_shell_script_with_timeout(&shell, "ignored", Duration::from_millis(500)) + .await + .expect_err("snapshot shell should time out"); + assert!( + err.to_string().contains("timed out"), + "expected timeout error, got {err:?}" + ); + + let pid = fs::read_to_string(&pid_path) + .await + .expect("snapshot shell writes its pid before timing out") + .trim() + .parse::()?; + + let deadline = Instant::now() + TokioDuration::from_secs(1); + loop { + let kill_status = StdCommand::new("kill") + .arg("-0") + .arg(pid.to_string()) + .stderr(Stdio::null()) + .stdout(Stdio::null()) + .status()?; + if !kill_status.success() { + break; + } + if Instant::now() >= deadline { + panic!("timed out snapshot shell is still alive after grace period"); + } + sleep(TokioDuration::from_millis(50)).await; + } + + Ok(()) + } + + #[cfg(target_os = "macos")] + #[tokio::test] + async fn macos_zsh_snapshot_includes_sections() -> Result<()> { + let snapshot = get_snapshot(ShellType::Zsh).await?; + assert_posix_snapshot_sections(&snapshot); + Ok(()) + } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn linux_bash_snapshot_includes_sections() -> Result<()> { + let snapshot = get_snapshot(ShellType::Bash).await?; + assert_posix_snapshot_sections(&snapshot); + Ok(()) + } + + #[cfg(target_os = "linux")] + #[tokio::test] + async fn linux_sh_snapshot_includes_sections() -> Result<()> { + let snapshot = get_snapshot(ShellType::Sh).await?; + assert_posix_snapshot_sections(&snapshot); + Ok(()) + } + + #[cfg(target_os = "windows")] + #[ignore] + #[tokio::test] + async fn windows_powershell_snapshot_includes_sections() -> Result<()> { + let snapshot = get_snapshot(ShellType::PowerShell).await?; + assert!(snapshot.contains("# Snapshot file")); + assert!(snapshot.contains("aliases ")); + assert!(snapshot.contains("exports ")); + Ok(()) + } +} diff --git a/codex-rs/core/src/skills/injection.rs b/codex-rs/core/src/skills/injection.rs new file mode 100644 index 0000000000..a143fce1f2 --- /dev/null +++ b/codex-rs/core/src/skills/injection.rs @@ -0,0 +1,78 @@ +use std::collections::HashSet; + +use crate::skills::SkillLoadOutcome; +use crate::skills::SkillMetadata; +use crate::user_instructions::SkillInstructions; +use codex_protocol::models::ResponseItem; +use codex_protocol::user_input::UserInput; +use tokio::fs; + +#[derive(Debug, Default)] +pub(crate) struct SkillInjections { + pub(crate) items: Vec, + pub(crate) warnings: Vec, +} + +pub(crate) async fn build_skill_injections( + inputs: &[UserInput], + skills: Option<&SkillLoadOutcome>, +) -> SkillInjections { + if inputs.is_empty() { + return SkillInjections::default(); + } + + let Some(outcome) = skills else { + return SkillInjections::default(); + }; + + let mentioned_skills = collect_explicit_skill_mentions(inputs, &outcome.skills); + if mentioned_skills.is_empty() { + return SkillInjections::default(); + } + + let mut result = SkillInjections { + items: Vec::with_capacity(mentioned_skills.len()), + warnings: Vec::new(), + }; + + for skill in mentioned_skills { + match fs::read_to_string(&skill.path).await { + Ok(contents) => { + result.items.push(ResponseItem::from(SkillInstructions { + name: skill.name, + path: skill.path.to_string_lossy().into_owned(), + contents, + })); + } + Err(err) => { + let message = format!( + "Failed to load skill {} at {}: {err:#}", + skill.name, + skill.path.display() + ); + result.warnings.push(message); + } + } + } + + result +} + +fn collect_explicit_skill_mentions( + inputs: &[UserInput], + skills: &[SkillMetadata], +) -> Vec { + let mut selected: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + for input in inputs { + if let UserInput::Skill { name, path } = input + && seen.insert(name.clone()) + && let Some(skill) = skills.iter().find(|s| s.name == *name && s.path == *path) + { + selected.push(skill.clone()); + } + } + + selected +} diff --git a/codex-rs/core/src/skills/loader.rs b/codex-rs/core/src/skills/loader.rs index a9ea156f02..c014af3147 100644 --- a/codex-rs/core/src/skills/loader.rs +++ b/codex-rs/core/src/skills/loader.rs @@ -1,4 +1,5 @@ use crate::config::Config; +use crate::git_info::resolve_root_git_project_for_trust; use crate::skills::model::SkillError; use crate::skills::model::SkillLoadOutcome; use crate::skills::model::SkillMetadata; @@ -20,6 +21,7 @@ struct SkillFrontmatter { const SKILLS_FILENAME: &str = "SKILL.md"; const SKILLS_DIR_NAME: &str = "skills"; +const REPO_ROOT_CONFIG_DIR_NAME: &str = ".codex"; const MAX_NAME_LEN: usize = 100; const MAX_DESCRIPTION_LEN: usize = 500; @@ -65,7 +67,17 @@ pub fn load_skills(config: &Config) -> SkillLoadOutcome { } fn skill_roots(config: &Config) -> Vec { - vec![config.codex_home.join(SKILLS_DIR_NAME)] + let mut roots = vec![config.codex_home.join(SKILLS_DIR_NAME)]; + + if let Some(repo_root) = resolve_root_git_project_for_trust(&config.cwd) { + roots.push( + repo_root + .join(REPO_ROOT_CONFIG_DIR_NAME) + .join(SKILLS_DIR_NAME), + ); + } + + roots } fn discover_skills_under_root(root: &Path, outcome: &mut SkillLoadOutcome) { @@ -196,6 +208,9 @@ mod tests { use super::*; use crate::config::ConfigOverrides; use crate::config::ConfigToml; + use pretty_assertions::assert_eq; + use std::path::Path; + use std::process::Command; use tempfile::TempDir; fn make_config(codex_home: &TempDir) -> Config { @@ -211,7 +226,11 @@ mod tests { } fn write_skill(codex_home: &TempDir, dir: &str, name: &str, description: &str) -> PathBuf { - let skill_dir = codex_home.path().join(format!("skills/{dir}")); + write_skill_at(codex_home.path(), dir, name, description) + } + + fn write_skill_at(root: &Path, dir: &str, name: &str, description: &str) -> PathBuf { + let skill_dir = root.join(format!("skills/{dir}")); fs::create_dir_all(&skill_dir).unwrap(); let indented_description = description.replace('\n', "\n "); let content = format!( @@ -288,4 +307,37 @@ mod tests { "expected length error" ); } + + #[test] + fn loads_skills_from_repo_root() { + let codex_home = tempfile::tempdir().expect("tempdir"); + let repo_dir = tempfile::tempdir().expect("tempdir"); + + let status = Command::new("git") + .arg("init") + .current_dir(repo_dir.path()) + .status() + .expect("git init"); + assert!(status.success(), "git init failed"); + + let skills_root = repo_dir + .path() + .join(REPO_ROOT_CONFIG_DIR_NAME) + .join(SKILLS_DIR_NAME); + write_skill_at(&skills_root, "repo", "repo-skill", "from repo"); + let mut cfg = make_config(&codex_home); + cfg.cwd = repo_dir.path().to_path_buf(); + let repo_root = normalize_path(&skills_root).unwrap_or_else(|_| skills_root.clone()); + + let outcome = load_skills(&cfg); + assert!( + outcome.errors.is_empty(), + "unexpected errors: {:?}", + outcome.errors + ); + assert_eq!(outcome.skills.len(), 1); + let skill = &outcome.skills[0]; + assert_eq!(skill.name, "repo-skill"); + assert!(skill.path.starts_with(&repo_root)); + } } diff --git a/codex-rs/core/src/skills/mod.rs b/codex-rs/core/src/skills/mod.rs index ebb1490c99..b2ab935ce5 100644 --- a/codex-rs/core/src/skills/mod.rs +++ b/codex-rs/core/src/skills/mod.rs @@ -1,7 +1,10 @@ +pub mod injection; pub mod loader; pub mod model; pub mod render; +pub(crate) use injection::SkillInjections; +pub(crate) use injection::build_skill_injections; pub use loader::load_skills; pub use model::SkillError; pub use model::SkillLoadOutcome; diff --git a/codex-rs/core/src/skills/render.rs b/codex-rs/core/src/skills/render.rs index d547e21c28..b664565459 100644 --- a/codex-rs/core/src/skills/render.rs +++ b/codex-rs/core/src/skills/render.rs @@ -17,5 +17,26 @@ pub fn render_skills_section(skills: &[SkillMetadata]) -> Option { )); } + lines.push( + r###"- Discovery: Available skills are listed in project docs and may also appear in a runtime "## Skills" section (name + description + file path). These are the sources of truth; skill bodies live on disk at the listed paths. +- Trigger rules: If the user names a skill (with `$SkillName` or plain text) OR the task clearly matches a skill's description, you must use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned. +- Missing/blocked: If a named skill isn't in the list or the path can't be read, say so briefly and continue with the best fallback. +- How to use a skill (progressive disclosure): + 1) After deciding to use a skill, open its `SKILL.md`. Read only enough to follow the workflow. + 2) If `SKILL.md` points to extra folders such as `references/`, load only the specific files needed for the request; don't bulk-load everything. + 3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks. + 4) If `assets/` or templates exist, reuse them instead of recreating from scratch. +- Description as trigger: The YAML `description` in `SKILL.md` is the primary trigger signal; rely on it to decide applicability. If unsure, ask a brief clarification before proceeding. +- Coordination and sequencing: + - If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them. + - Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why. +- Context hygiene: + - Keep context small: summarize long sections instead of pasting them; only load extra files when needed. + - Avoid deeply nested references; prefer one-hop files explicitly linked from `SKILL.md`. + - When variants exist (frameworks, providers, domains), pick only the relevant reference file(s) and note that choice. +- Safety and fallback: If a skill can't be applied cleanly (missing files, unclear instructions), state the issue, pick the next-best approach, and continue."### + .to_string(), + ); + Some(lines.join("\n")) } diff --git a/codex-rs/core/src/state/service.rs b/codex-rs/core/src/state/service.rs index 287fb73d25..0270f3411c 100644 --- a/codex-rs/core/src/state/service.rs +++ b/codex-rs/core/src/state/service.rs @@ -3,6 +3,8 @@ use std::sync::Arc; use crate::AuthManager; use crate::RolloutRecorder; use crate::mcp_connection_manager::McpConnectionManager; +use crate::openai_models::models_manager::ModelsManager; +use crate::skills::SkillLoadOutcome; use crate::tools::sandboxing::ApprovalStore; use crate::unified_exec::UnifiedExecSessionManager; use crate::user_notification::UserNotifier; @@ -17,9 +19,11 @@ pub(crate) struct SessionServices { pub(crate) unified_exec_manager: UnifiedExecSessionManager, pub(crate) notifier: UserNotifier, pub(crate) rollout: Mutex>, - pub(crate) user_shell: crate::shell::Shell, + pub(crate) user_shell: Arc, pub(crate) show_raw_agent_reasoning: bool, pub(crate) auth_manager: Arc, + pub(crate) models_manager: Arc, pub(crate) otel_event_manager: OtelEventManager, pub(crate) tool_approvals: Mutex, + pub(crate) skills: Option, } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 8c739c9243..c61d188373 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -62,7 +62,7 @@ impl SessionState { } pub(crate) fn set_rate_limits(&mut self, snapshot: RateLimitSnapshot) { - self.latest_rate_limits = Some(merge_rate_limit_credits( + self.latest_rate_limits = Some(merge_rate_limit_fields( self.latest_rate_limits.as_ref(), snapshot, )); @@ -83,13 +83,16 @@ impl SessionState { } } -// Sometimes new snapshots don't include credits -fn merge_rate_limit_credits( +// Sometimes new snapshots don't include credits or plan information. +fn merge_rate_limit_fields( previous: Option<&RateLimitSnapshot>, mut snapshot: RateLimitSnapshot, ) -> RateLimitSnapshot { if snapshot.credits.is_none() { snapshot.credits = previous.and_then(|prior| prior.credits.clone()); } + if snapshot.plan_type.is_none() { + snapshot.plan_type = previous.and_then(|prior| prior.plan_type); + } snapshot } diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs new file mode 100644 index 0000000000..1cb74bc250 --- /dev/null +++ b/codex-rs/core/src/stream_events_utils.rs @@ -0,0 +1,212 @@ +use std::pin::Pin; +use std::sync::Arc; + +use codex_protocol::items::TurnItem; +use tokio_util::sync::CancellationToken; + +use crate::codex::Session; +use crate::codex::TurnContext; +use crate::error::CodexErr; +use crate::error::Result; +use crate::function_tool::FunctionCallError; +use crate::parse_turn_item; +use crate::tools::parallel::ToolCallRuntime; +use crate::tools::router::ToolRouter; +use codex_protocol::models::FunctionCallOutputPayload; +use codex_protocol::models::ResponseInputItem; +use codex_protocol::models::ResponseItem; +use futures::Future; +use tracing::debug; + +/// Handle a completed output item from the model stream, recording it and +/// queuing any tool execution futures. This records items immediately so +/// history and rollout stay in sync even if the turn is later cancelled. +pub(crate) type InFlightFuture<'f> = + Pin> + Send + 'f>>; + +#[derive(Default)] +pub(crate) struct OutputItemResult { + pub last_agent_message: Option, + pub needs_follow_up: bool, + pub tool_future: Option>, +} + +pub(crate) struct HandleOutputCtx { + pub sess: Arc, + pub turn_context: Arc, + pub tool_runtime: ToolCallRuntime, + pub cancellation_token: CancellationToken, +} + +pub(crate) async fn handle_output_item_done( + ctx: &mut HandleOutputCtx, + item: ResponseItem, + previously_active_item: Option, +) -> Result { + let mut output = OutputItemResult::default(); + + match ToolRouter::build_tool_call(ctx.sess.as_ref(), item.clone()).await { + // The model emitted a tool call; log it, persist the item immediately, and queue the tool execution. + Ok(Some(call)) => { + let payload_preview = call.payload.log_payload().into_owned(); + tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview); + + ctx.sess + .record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item)) + .await; + + let cancellation_token = ctx.cancellation_token.child_token(); + let tool_runtime = ctx.tool_runtime.clone(); + + let tool_future: InFlightFuture<'static> = Box::pin(async move { + let response_input = tool_runtime + .handle_tool_call(call, cancellation_token) + .await?; + Ok(response_input) + }); + + output.needs_follow_up = true; + output.tool_future = Some(tool_future); + } + // No tool call: convert messages/reasoning into turn items and mark them as complete. + Ok(None) => { + if let Some(turn_item) = handle_non_tool_response_item(&item).await { + if previously_active_item.is_none() { + ctx.sess + .emit_turn_item_started(&ctx.turn_context, &turn_item) + .await; + } + + ctx.sess + .emit_turn_item_completed(&ctx.turn_context, turn_item) + .await; + } + + ctx.sess + .record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item)) + .await; + let last_agent_message = last_assistant_message_from_item(&item); + + output.last_agent_message = last_agent_message; + } + // Guardrail: the model issued a LocalShellCall without an id; surface the error back into history. + Err(FunctionCallError::MissingLocalShellCallId) => { + let msg = "LocalShellCall without call_id or id"; + ctx.turn_context + .client + .get_otel_event_manager() + .log_tool_failed("local_shell", msg); + tracing::error!(msg); + + let response = ResponseInputItem::FunctionCallOutput { + call_id: String::new(), + output: FunctionCallOutputPayload { + content: msg.to_string(), + ..Default::default() + }, + }; + ctx.sess + .record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item)) + .await; + if let Some(response_item) = response_input_to_response_item(&response) { + ctx.sess + .record_conversation_items( + &ctx.turn_context, + std::slice::from_ref(&response_item), + ) + .await; + } + + output.needs_follow_up = true; + } + // The tool request should be answered directly (or was denied); push that response into the transcript. + Err(FunctionCallError::RespondToModel(message)) + | Err(FunctionCallError::Denied(message)) => { + let response = ResponseInputItem::FunctionCallOutput { + call_id: String::new(), + output: FunctionCallOutputPayload { + content: message, + ..Default::default() + }, + }; + ctx.sess + .record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item)) + .await; + if let Some(response_item) = response_input_to_response_item(&response) { + ctx.sess + .record_conversation_items( + &ctx.turn_context, + std::slice::from_ref(&response_item), + ) + .await; + } + + output.needs_follow_up = true; + } + // A fatal error occurred; surface it back into history. + Err(FunctionCallError::Fatal(message)) => { + return Err(CodexErr::Fatal(message)); + } + } + + Ok(output) +} + +pub(crate) async fn handle_non_tool_response_item(item: &ResponseItem) -> Option { + debug!(?item, "Output item"); + + match item { + ResponseItem::Message { .. } + | ResponseItem::Reasoning { .. } + | ResponseItem::WebSearchCall { .. } => parse_turn_item(item), + ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => { + debug!("unexpected tool output from stream"); + None + } + _ => None, + } +} + +pub(crate) fn last_assistant_message_from_item(item: &ResponseItem) -> Option { + if let ResponseItem::Message { role, content, .. } = item + && role == "assistant" + { + return content.iter().rev().find_map(|ci| match ci { + codex_protocol::models::ContentItem::OutputText { text } => Some(text.clone()), + _ => None, + }); + } + None +} + +pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Option { + match input { + ResponseInputItem::FunctionCallOutput { call_id, output } => { + Some(ResponseItem::FunctionCallOutput { + call_id: call_id.clone(), + output: output.clone(), + }) + } + ResponseInputItem::CustomToolCallOutput { call_id, output } => { + Some(ResponseItem::CustomToolCallOutput { + call_id: call_id.clone(), + output: output.clone(), + }) + } + ResponseInputItem::McpToolCallOutput { call_id, result } => { + let output = match result { + Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result), + Err(err) => FunctionCallOutputPayload { + content: err.clone(), + success: Some(false), + ..Default::default() + }, + }; + Some(ResponseItem::FunctionCallOutput { + call_id: call_id.clone(), + output, + }) + } + _ => None, + } +} diff --git a/codex-rs/core/src/tasks/compact.rs b/codex-rs/core/src/tasks/compact.rs index 893c0c476a..293116c167 100644 --- a/codex-rs/core/src/tasks/compact.rs +++ b/codex-rs/core/src/tasks/compact.rs @@ -25,7 +25,7 @@ impl SessionTask for CompactTask { _cancellation_token: CancellationToken, ) -> Option { let session = session.clone_session(); - if crate::compact::should_use_remote_compact_task(&session).await { + if crate::compact::should_use_remote_compact_task(&session) { crate::compact_remote::run_remote_compact_task(session, ctx).await } else { crate::compact::run_compact_task(session, ctx, input).await diff --git a/codex-rs/core/src/tasks/ghost_snapshot.rs b/codex-rs/core/src/tasks/ghost_snapshot.rs index ef5d42a281..7e84c330f6 100644 --- a/codex-rs/core/src/tasks/ghost_snapshot.rs +++ b/codex-rs/core/src/tasks/ghost_snapshot.rs @@ -15,6 +15,8 @@ use codex_protocol::user_input::UserInput; use codex_utils_readiness::Readiness; use codex_utils_readiness::Token; use std::sync::Arc; +use std::time::Duration; +use tokio::sync::oneshot; use tokio_util::sync::CancellationToken; use tracing::info; use tracing::warn; @@ -23,6 +25,8 @@ pub(crate) struct GhostSnapshotTask { token: Token, } +const SNAPSHOT_WARNING_THRESHOLD: Duration = Duration::from_secs(240); + #[async_trait] impl SessionTask for GhostSnapshotTask { fn kind(&self) -> TaskKind { @@ -38,7 +42,33 @@ impl SessionTask for GhostSnapshotTask { ) -> Option { tokio::task::spawn(async move { let token = self.token; - let ctx_for_task = Arc::clone(&ctx); + // Channel used to signal when the snapshot work has finished so the + // timeout warning task can exit early without sending a warning. + let (snapshot_done_tx, snapshot_done_rx) = oneshot::channel::<()>(); + let ctx_for_warning = ctx.clone(); + let cancellation_token_for_warning = cancellation_token.clone(); + let session_for_warning = session.clone(); + // Fire a generic warning if the snapshot is still running after + // three minutes; this helps users discover large untracked files + // that might need to be added to .gitignore. + tokio::task::spawn(async move { + tokio::select! { + _ = tokio::time::sleep(SNAPSHOT_WARNING_THRESHOLD) => { + session_for_warning.session + .send_event( + &ctx_for_warning, + EventMsg::Warning(WarningEvent { + message: "Repository snapshot is taking longer than expected. Large untracked or ignored files can slow snapshots; consider adding large files or directories to .gitignore or disabling `undo` in your config.".to_string() + }), + ) + .await; + } + _ = snapshot_done_rx => {} + _ = cancellation_token_for_warning.cancelled() => {} + } + }); + + let ctx_for_task = ctx.clone(); let cancelled = tokio::select! { _ = cancellation_token.cancelled() => true, _ = async { @@ -109,6 +139,8 @@ impl SessionTask for GhostSnapshotTask { } => false, }; + let _ = snapshot_done_tx.send(()); + if cancelled { info!("ghost snapshot task cancelled"); } diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index 9bda02c34e..fa5433ef5e 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -19,6 +19,7 @@ use tracing::warn; use crate::AuthManager; use crate::codex::Session; use crate::codex::TurnContext; +use crate::openai_models::models_manager::ModelsManager; use crate::protocol::EventMsg; use crate::protocol::TaskCompleteEvent; use crate::protocol::TurnAbortReason; @@ -55,6 +56,10 @@ impl SessionTaskContext { pub(crate) fn auth_manager(&self) -> Arc { Arc::clone(&self.session.services.auth_manager) } + + pub(crate) fn models_manager(&self) -> Arc { + Arc::clone(&self.session.services.models_manager) + } } /// Async task that drives a [`Session`] turn. diff --git a/codex-rs/core/src/tasks/review.rs b/codex-rs/core/src/tasks/review.rs index a6ec840a84..da7f29d4ad 100644 --- a/codex-rs/core/src/tasks/review.rs +++ b/codex-rs/core/src/tasks/review.rs @@ -16,6 +16,7 @@ use tokio_util::sync::CancellationToken; use crate::codex::Session; use crate::codex::TurnContext; use crate::codex_delegate::run_codex_conversation_one_shot; +use crate::protocol::SandboxPolicy; use crate::review_format::format_review_findings_block; use crate::review_format::render_review_output_text; use crate::state::TaskKind; @@ -77,6 +78,7 @@ async fn start_review_conversation( ) -> Option> { let config = ctx.client.config(); let mut sub_agent_config = config.as_ref().clone(); + sub_agent_config.sandbox_policy = SandboxPolicy::new_read_only_policy(); // Run with only reviewer rubric — drop outer user_instructions sub_agent_config.user_instructions = None; // Avoid loading project docs; reviewer only needs findings @@ -90,9 +92,12 @@ async fn start_review_conversation( // Set explicit review rubric for the sub-agent sub_agent_config.base_instructions = Some(crate::REVIEW_PROMPT.to_string()); + + sub_agent_config.model = Some(config.review_model.clone()); (run_codex_conversation_one_shot( sub_agent_config, session.auth_manager(), + session.models_manager(), input, session.clone_session(), ctx.clone(), diff --git a/codex-rs/core/src/tasks/user_shell.rs b/codex-rs/core/src/tasks/user_shell.rs index ca5243241a..aec09514ca 100644 --- a/codex-rs/core/src/tasks/user_shell.rs +++ b/codex-rs/core/src/tasks/user_shell.rs @@ -24,6 +24,7 @@ use crate::protocol::ExecCommandSource; use crate::protocol::SandboxPolicy; use crate::protocol::TaskStartedEvent; use crate::sandboxing::ExecEnv; +use crate::sandboxing::SandboxPermissions; use crate::state::TaskKind; use crate::tools::format_exec_output_str; use crate::user_shell_command::user_shell_command_record_item; @@ -100,7 +101,7 @@ impl SessionTask for UserShellCommandTask { // should use that instead of an "arbitrarily large" timeout here. expiration: USER_SHELL_TIMEOUT_MS.into(), sandbox: SandboxType::None, - with_escalated_permissions: None, + sandbox_permissions: SandboxPermissions::UseDefault, justification: None, arg0: None, }; diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index 93bce60489..cdfc575cd9 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -134,7 +134,6 @@ impl ToolEmitter { command: &[String], cwd: PathBuf, source: ExecCommandSource, - interaction_input: Option, process_id: Option, ) -> Self { let parsed_cmd = parse_command(command); @@ -142,7 +141,7 @@ impl ToolEmitter { command: command.to_vec(), cwd, source, - interaction_input, + interaction_input: None, // TODO(jif) drop this field in the protocol. parsed_cmd, process_id, } diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index 4a28619c76..14a481f4ea 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -26,8 +26,6 @@ use crate::tools::sandboxing::ToolCtx; use crate::tools::spec::ApplyPatchToolArgs; use crate::tools::spec::JsonSchema; use async_trait::async_trait; -use serde::Deserialize; -use serde::Serialize; pub struct ApplyPatchHandler; @@ -46,7 +44,7 @@ impl ToolHandler for ApplyPatchHandler { ) } - fn is_mutating(&self, _invocation: &ToolInvocation) -> bool { + async fn is_mutating(&self, _invocation: &ToolInvocation) -> bool { true } @@ -161,13 +159,6 @@ impl ToolHandler for ApplyPatchHandler { } } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] -#[serde(rename_all = "snake_case")] -pub enum ApplyPatchToolType { - Freeform, - Function, -} - #[allow(clippy::too_many_arguments)] pub(crate) async fn intercept_apply_patch( command: &[String], diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index d1b7d3144c..ded9c7ac41 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -6,11 +6,11 @@ use std::sync::Arc; use crate::codex::TurnContext; use crate::exec::ExecParams; use crate::exec_env::create_env; -use crate::exec_policy::create_approval_requirement_for_command; +use crate::exec_policy::create_exec_approval_requirement_for_command; use crate::function_tool::FunctionCallError; use crate::is_safe_command::is_known_safe_command; use crate::protocol::ExecCommandSource; -use crate::sandboxing::SandboxPermissions; +use crate::shell::Shell; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; @@ -35,7 +35,7 @@ impl ShellHandler { cwd: turn_context.resolve_path(params.workdir.clone()), expiration: params.timeout_ms.into(), env: create_env(&turn_context.shell_environment_policy), - with_escalated_permissions: params.with_escalated_permissions, + sandbox_permissions: params.sandbox_permissions.unwrap_or_default(), justification: params.justification, arg0: None, } @@ -43,21 +43,25 @@ impl ShellHandler { } impl ShellCommandHandler { + fn base_command(shell: &Shell, command: &str, login: Option) -> Vec { + let use_login_shell = login.unwrap_or(true); + shell.derive_exec_args(command, use_login_shell) + } + fn to_exec_params( params: ShellCommandToolCallParams, session: &crate::codex::Session, turn_context: &TurnContext, ) -> ExecParams { let shell = session.user_shell(); - let use_login_shell = true; - let command = shell.derive_exec_args(¶ms.command, use_login_shell); + let command = Self::base_command(shell.as_ref(), ¶ms.command, params.login); ExecParams { command, cwd: turn_context.resolve_path(params.workdir.clone()), expiration: params.timeout_ms.into(), env: create_env(&turn_context.shell_environment_policy), - with_escalated_permissions: params.with_escalated_permissions, + sandbox_permissions: params.sandbox_permissions.unwrap_or_default(), justification: params.justification, arg0: None, } @@ -77,7 +81,7 @@ impl ToolHandler for ShellHandler { ) } - fn is_mutating(&self, invocation: &ToolInvocation) -> bool { + async fn is_mutating(&self, invocation: &ToolInvocation) -> bool { match &invocation.payload { ToolPayload::Function { arguments } => { serde_json::from_str::(arguments) @@ -149,6 +153,20 @@ impl ToolHandler for ShellCommandHandler { matches!(payload, ToolPayload::Function { .. }) } + async fn is_mutating(&self, invocation: &ToolInvocation) -> bool { + let ToolPayload::Function { arguments } = &invocation.payload else { + return true; + }; + + serde_json::from_str::(arguments) + .map(|params| { + let shell = invocation.session.user_shell(); + let command = Self::base_command(shell.as_ref(), ¶ms.command, params.login); + !is_known_safe_command(&command) + }) + .unwrap_or(true) + } + async fn handle(&self, invocation: ToolInvocation) -> Result { let ToolInvocation { session, @@ -193,7 +211,9 @@ impl ShellHandler { freeform: bool, ) -> Result { // Approval policy guard for explicit escalation in non-OnRequest modes. - if exec_params.with_escalated_permissions.unwrap_or(false) + if exec_params + .sandbox_permissions + .requires_escalated_permissions() && !matches!( turn.approval_policy, codex_protocol::protocol::AskForApproval::OnRequest @@ -231,12 +251,14 @@ impl ShellHandler { let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None); emitter.begin(event_ctx).await; - let approval_requirement = create_approval_requirement_for_command( + let features = session.features(); + let exec_approval_requirement = create_exec_approval_requirement_for_command( &turn.exec_policy, + &features, &exec_params.command, turn.approval_policy, &turn.sandbox_policy, - SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)), + exec_params.sandbox_permissions, ) .await; @@ -245,9 +267,9 @@ impl ShellHandler { cwd: exec_params.cwd.clone(), timeout_ms: exec_params.expiration.timeout_ms(), env: exec_params.env.clone(), - with_escalated_permissions: exec_params.with_escalated_permissions, + sandbox_permissions: exec_params.sandbox_permissions, justification: exec_params.justification.clone(), - approval_requirement, + exec_approval_requirement, }; let mut orchestrator = ToolOrchestrator::new(); let mut runtime = ShellRuntime::new(); @@ -273,10 +295,19 @@ impl ShellHandler { #[cfg(test)] mod tests { use std::path::PathBuf; + use std::sync::Arc; + use codex_protocol::models::ShellCommandToolCallParams; + use pretty_assertions::assert_eq; + + use crate::codex::make_session_and_context; + use crate::exec_env::create_env; use crate::is_safe_command::is_known_safe_command; + use crate::sandboxing::SandboxPermissions; use crate::shell::Shell; use crate::shell::ShellType; + use crate::shell_snapshot::ShellSnapshot; + use crate::tools::handlers::ShellCommandHandler; /// The logic for is_known_safe_command() has heuristics for known shells, /// so we must ensure the commands generated by [ShellCommandHandler] can be @@ -286,18 +317,21 @@ mod tests { let bash_shell = Shell { shell_type: ShellType::Bash, shell_path: PathBuf::from("/bin/bash"), + shell_snapshot: None, }; assert_safe(&bash_shell, "ls -la"); let zsh_shell = Shell { shell_type: ShellType::Zsh, shell_path: PathBuf::from("/bin/zsh"), + shell_snapshot: None, }; assert_safe(&zsh_shell, "ls -la"); let powershell = Shell { shell_type: ShellType::PowerShell, shell_path: PathBuf::from("pwsh.exe"), + shell_snapshot: None, }; assert_safe(&powershell, "ls -Name"); } @@ -310,4 +344,65 @@ mod tests { &shell.derive_exec_args(command, /* use_login_shell */ false) )); } + + #[test] + fn shell_command_handler_to_exec_params_uses_session_shell_and_turn_context() { + let (session, turn_context) = make_session_and_context(); + + let command = "echo hello".to_string(); + let workdir = Some("subdir".to_string()); + let login = None; + let timeout_ms = Some(1234); + let sandbox_permissions = SandboxPermissions::RequireEscalated; + let justification = Some("because tests".to_string()); + + let expected_command = session.user_shell().derive_exec_args(&command, true); + let expected_cwd = turn_context.resolve_path(workdir.clone()); + let expected_env = create_env(&turn_context.shell_environment_policy); + + let params = ShellCommandToolCallParams { + command, + workdir, + login, + timeout_ms, + sandbox_permissions: Some(sandbox_permissions), + justification: justification.clone(), + }; + + let exec_params = ShellCommandHandler::to_exec_params(params, &session, &turn_context); + + // ExecParams cannot derive Eq due to the CancellationToken field, so we manually compare the fields. + assert_eq!(exec_params.command, expected_command); + assert_eq!(exec_params.cwd, expected_cwd); + assert_eq!(exec_params.env, expected_env); + assert_eq!(exec_params.expiration.timeout_ms(), timeout_ms); + assert_eq!(exec_params.sandbox_permissions, sandbox_permissions); + assert_eq!(exec_params.justification, justification); + assert_eq!(exec_params.arg0, None); + } + + #[test] + fn shell_command_handler_respects_explicit_login_flag() { + let shell = Shell { + shell_type: ShellType::Bash, + shell_path: PathBuf::from("/bin/bash"), + shell_snapshot: Some(Arc::new(ShellSnapshot { + path: PathBuf::from("/tmp/snapshot.sh"), + })), + }; + + let login_command = + ShellCommandHandler::base_command(&shell, "echo login shell", Some(true)); + assert_eq!( + login_command, + shell.derive_exec_args("echo login shell", true) + ); + + let non_login_command = + ShellCommandHandler::base_command(&shell, "echo non login shell", Some(false)); + assert_eq!( + non_login_command, + shell.derive_exec_args("echo non login shell", false) + ); + } } diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 4c943c6285..e81c35186e 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -1,12 +1,10 @@ -use std::path::PathBuf; - use crate::function_tool::FunctionCallError; use crate::is_safe_command::is_known_safe_command; use crate::protocol::EventMsg; -use crate::protocol::ExecCommandOutputDeltaEvent; use crate::protocol::ExecCommandSource; -use crate::protocol::ExecOutputStream; -use crate::shell::default_user_shell; +use crate::protocol::TerminalInteractionEvent; +use crate::sandboxing::SandboxPermissions; +use crate::shell::Shell; use crate::shell::get_shell_by_model_provided_path; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; @@ -24,6 +22,8 @@ use crate::unified_exec::UnifiedExecSessionManager; use crate::unified_exec::WriteStdinRequest; use async_trait::async_trait; use serde::Deserialize; +use std::path::PathBuf; +use std::sync::Arc; pub struct UnifiedExecHandler; @@ -41,7 +41,7 @@ struct ExecCommandArgs { #[serde(default)] max_output_tokens: Option, #[serde(default)] - with_escalated_permissions: Option, + sandbox_permissions: SandboxPermissions, #[serde(default)] justification: Option, } @@ -83,7 +83,7 @@ impl ToolHandler for UnifiedExecHandler { ) } - fn is_mutating(&self, invocation: &ToolInvocation) -> bool { + async fn is_mutating(&self, invocation: &ToolInvocation) -> bool { let (ToolPayload::Function { arguments } | ToolPayload::UnifiedExec { arguments }) = &invocation.payload else { @@ -93,7 +93,7 @@ impl ToolHandler for UnifiedExecHandler { let Ok(params) = serde_json::from_str::(arguments) else { return true; }; - let command = get_command(¶ms); + let command = get_command(¶ms, invocation.session.user_shell()); !is_known_safe_command(&command) } @@ -129,23 +129,24 @@ impl ToolHandler for UnifiedExecHandler { )) })?; let process_id = manager.allocate_process_id().await; + let command = get_command(&args, session.user_shell()); - let command = get_command(&args); let ExecCommandArgs { workdir, yield_time_ms, max_output_tokens, - with_escalated_permissions, + sandbox_permissions, justification, .. } = args; - if with_escalated_permissions.unwrap_or(false) + if sandbox_permissions.requires_escalated_permissions() && !matches!( context.turn.approval_policy, codex_protocol::protocol::AskForApproval::OnRequest ) { + manager.release_process_id(&process_id).await; return Err(FunctionCallError::RespondToModel(format!( "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", policy = context.turn.approval_policy @@ -169,6 +170,7 @@ impl ToolHandler for UnifiedExecHandler { ) .await? { + manager.release_process_id(&process_id).await; return Ok(output); } @@ -182,7 +184,6 @@ impl ToolHandler for UnifiedExecHandler { &command, cwd.clone(), ExecCommandSource::UnifiedExecStartup, - None, Some(process_id.clone()), ); emitter.emit(event_ctx, ToolEventStage::Begin).await; @@ -195,7 +196,7 @@ impl ToolHandler for UnifiedExecHandler { yield_time_ms, max_output_tokens, workdir, - with_escalated_permissions, + sandbox_permissions, justification, }, &context, @@ -211,9 +212,8 @@ impl ToolHandler for UnifiedExecHandler { "failed to parse write_stdin arguments: {err:?}" )) })?; - manager + let response = manager .write_stdin(WriteStdinRequest { - call_id: &call_id, process_id: &args.session_id.to_string(), input: &args.chars, yield_time_ms: args.yield_time_ms, @@ -222,7 +222,18 @@ impl ToolHandler for UnifiedExecHandler { .await .map_err(|err| { FunctionCallError::RespondToModel(format!("write_stdin failed: {err:?}")) - })? + })?; + + let interaction = TerminalInteractionEvent { + call_id: response.event_call_id.clone(), + process_id: args.session_id.to_string(), + stdin: args.chars.clone(), + }; + session + .send_event(turn.as_ref(), EventMsg::TerminalInteraction(interaction)) + .await; + + response } other => { return Err(FunctionCallError::RespondToModel(format!( @@ -231,18 +242,6 @@ impl ToolHandler for UnifiedExecHandler { } }; - // Emit a delta event with the chunk of output we just produced, if any. - if !response.output.is_empty() { - let delta = ExecCommandOutputDeltaEvent { - call_id: response.event_call_id.clone(), - stream: ExecOutputStream::Stdout, - chunk: response.output.as_bytes().to_vec(), - }; - session - .send_event(turn.as_ref(), EventMsg::ExecCommandOutputDelta(delta)) - .await; - } - let content = format_response(&response); Ok(ToolOutput::Function { @@ -253,12 +252,14 @@ impl ToolHandler for UnifiedExecHandler { } } -fn get_command(args: &ExecCommandArgs) -> Vec { - let shell = if let Some(shell_str) = &args.shell { - get_shell_by_model_provided_path(&PathBuf::from(shell_str)) - } else { - default_user_shell() - }; +fn get_command(args: &ExecCommandArgs, session_shell: Arc) -> Vec { + let model_shell = args.shell.as_ref().map(|shell_str| { + let mut shell = get_shell_by_model_provided_path(&PathBuf::from(shell_str)); + shell.shell_snapshot = None; + shell + }); + + let shell = model_shell.as_ref().unwrap_or(session_shell.as_ref()); shell.derive_exec_args(&args.cmd, args.login) } @@ -295,6 +296,8 @@ fn format_response(response: &UnifiedExecResponse) -> String { #[cfg(test)] mod tests { use super::*; + use crate::shell::default_user_shell; + use std::sync::Arc; #[test] fn test_get_command_uses_default_shell_when_unspecified() { @@ -305,7 +308,7 @@ mod tests { assert!(args.shell.is_none()); - let command = get_command(&args); + let command = get_command(&args, Arc::new(default_user_shell())); assert_eq!(command.len(), 3); assert_eq!(command[2], "echo hello"); @@ -320,9 +323,15 @@ mod tests { assert_eq!(args.shell.as_deref(), Some("/bin/bash")); - let command = get_command(&args); + let command = get_command(&args, Arc::new(default_user_shell())); - assert_eq!(command[2], "echo hello"); + assert_eq!(command.last(), Some(&"echo hello".to_string())); + if command + .iter() + .any(|arg| arg.eq_ignore_ascii_case("-Command")) + { + assert!(command.contains(&"-NoProfile".to_string())); + } } #[test] @@ -334,7 +343,7 @@ mod tests { assert_eq!(args.shell.as_deref(), Some("powershell")); - let command = get_command(&args); + let command = get_command(&args, Arc::new(default_user_shell())); assert_eq!(command[2], "echo hello"); } @@ -348,7 +357,7 @@ mod tests { assert_eq!(args.shell.as_deref(), Some("cmd")); - let command = get_command(&args); + let command = get_command(&args, Arc::new(default_user_shell())); assert_eq!(command[2], "echo hello"); } diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index de23d510bf..003c727610 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -7,18 +7,16 @@ retry without sandbox on denial (no re‑approval thanks to caching). */ use crate::error::CodexErr; use crate::error::SandboxErr; -use crate::error::get_error_message_ui; use crate::exec::ExecToolCallOutput; use crate::sandboxing::SandboxManager; use crate::tools::sandboxing::ApprovalCtx; -use crate::tools::sandboxing::ApprovalRequirement; -use crate::tools::sandboxing::ProvidesSandboxRetryData; +use crate::tools::sandboxing::ExecApprovalRequirement; use crate::tools::sandboxing::SandboxAttempt; use crate::tools::sandboxing::SandboxOverride; use crate::tools::sandboxing::ToolCtx; use crate::tools::sandboxing::ToolError; use crate::tools::sandboxing::ToolRuntime; -use crate::tools::sandboxing::default_approval_requirement; +use crate::tools::sandboxing::default_exec_approval_requirement; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; @@ -43,7 +41,6 @@ impl ToolOrchestrator { ) -> Result where T: ToolRuntime, - Rq: ProvidesSandboxRetryData, { let otel = turn_ctx.client.get_otel_event_manager(); let otel_tn = &tool_ctx.tool_name; @@ -54,47 +51,34 @@ impl ToolOrchestrator { // 1) Approval let mut already_approved = false; - let requirement = tool.approval_requirement(req).unwrap_or_else(|| { - default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy) + let requirement = tool.exec_approval_requirement(req).unwrap_or_else(|| { + default_exec_approval_requirement(approval_policy, &turn_ctx.sandbox_policy) }); match requirement { - ApprovalRequirement::Skip { .. } => { - otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg); + ExecApprovalRequirement::Skip { .. } => { + otel.tool_decision(otel_tn, otel_ci, &ReviewDecision::Approved, otel_cfg); } - ApprovalRequirement::Forbidden { reason } => { + ExecApprovalRequirement::Forbidden { reason } => { return Err(ToolError::Rejected(reason)); } - ApprovalRequirement::NeedsApproval { reason } => { - let mut risk = None; - - if let Some(metadata) = req.sandbox_retry_data() { - risk = tool_ctx - .session - .assess_sandbox_command( - turn_ctx, - &tool_ctx.call_id, - &metadata.command, - None, - ) - .await; - } - + ExecApprovalRequirement::NeedsApproval { reason, .. } => { let approval_ctx = ApprovalCtx { session: tool_ctx.session, turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: reason, - risk, }; let decision = tool.start_approval_async(req, approval_ctx).await; - otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone()); + otel.tool_decision(otel_tn, otel_ci, &decision, otel_user.clone()); match decision { ReviewDecision::Denied | ReviewDecision::Abort => { return Err(ToolError::Rejected("rejected by user".to_string())); } - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {} + ReviewDecision::Approved + | ReviewDecision::ApprovedExecpolicyAmendment { .. } + | ReviewDecision::ApprovedForSession => {} } already_approved = true; } @@ -139,43 +123,24 @@ impl ToolOrchestrator { // Ask for approval before retrying without sandbox. if !tool.should_bypass_approval(approval_policy, already_approved) { - let mut risk = None; - - if let Some(metadata) = req.sandbox_retry_data() { - let err = SandboxErr::Denied { - output: output.clone(), - }; - let friendly = get_error_message_ui(&CodexErr::Sandbox(err)); - let failure_summary = format!("failed in sandbox: {friendly}"); - - risk = tool_ctx - .session - .assess_sandbox_command( - turn_ctx, - &tool_ctx.call_id, - &metadata.command, - Some(failure_summary.as_str()), - ) - .await; - } - let reason_msg = build_denial_reason_from_output(output.as_ref()); let approval_ctx = ApprovalCtx { session: tool_ctx.session, turn: turn_ctx, call_id: &tool_ctx.call_id, retry_reason: Some(reason_msg), - risk, }; let decision = tool.start_approval_async(req, approval_ctx).await; - otel.tool_decision(otel_tn, otel_ci, decision, otel_user); + otel.tool_decision(otel_tn, otel_ci, &decision, otel_user); match decision { ReviewDecision::Denied | ReviewDecision::Abort => { return Err(ToolError::Rejected("rejected by user".to_string())); } - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {} + ReviewDecision::Approved + | ReviewDecision::ApprovedExecpolicyAmendment { .. } + | ReviewDecision::ApprovedForSession => {} } } diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index 33dc42b936..971ea934d8 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -17,6 +17,7 @@ use crate::tools::router::ToolRouter; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; +#[derive(Clone)] pub(crate) struct ToolCallRuntime { router: Arc, session: Arc, diff --git a/codex-rs/core/src/tools/registry.rs b/codex-rs/core/src/tools/registry.rs index f35ff06315..9b33e84b76 100644 --- a/codex-rs/core/src/tools/registry.rs +++ b/codex-rs/core/src/tools/registry.rs @@ -30,7 +30,7 @@ pub trait ToolHandler: Send + Sync { ) } - fn is_mutating(&self, _invocation: &ToolInvocation) -> bool { + async fn is_mutating(&self, _invocation: &ToolInvocation) -> bool { false } @@ -110,7 +110,7 @@ impl ToolRegistry { let output_cell = &output_cell; let invocation = invocation; async move { - if handler.is_mutating(&invocation) { + if handler.is_mutating(&invocation).await { tracing::trace!("waiting for tool gate"); invocation.turn.tool_call_gate.wait_ready().await; tracing::trace!("tool gate released"); diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index 7152d3c1ec..b6675bcd5d 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -5,6 +5,7 @@ use crate::client_common::tools::ToolSpec; use crate::codex::Session; use crate::codex::TurnContext; use crate::function_tool::FunctionCallError; +use crate::sandboxing::SandboxPermissions; use crate::tools::context::SharedTurnDiffTracker; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolPayload; @@ -114,7 +115,7 @@ impl ToolRouter { command: exec.command, workdir: exec.working_directory, timeout_ms: exec.timeout_ms, - with_escalated_permissions: None, + sandbox_permissions: Some(SandboxPermissions::UseDefault), justification: None, }; Ok(Some(ToolCall { diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index 2334f1e712..26d04f578c 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -7,12 +7,11 @@ use crate::CODEX_APPLY_PATCH_ARG1; use crate::exec::ExecToolCallOutput; use crate::sandboxing::CommandSpec; +use crate::sandboxing::SandboxPermissions; use crate::sandboxing::execute_env; use crate::tools::sandboxing::Approvable; use crate::tools::sandboxing::ApprovalCtx; -use crate::tools::sandboxing::ProvidesSandboxRetryData; use crate::tools::sandboxing::SandboxAttempt; -use crate::tools::sandboxing::SandboxRetryData; use crate::tools::sandboxing::Sandboxable; use crate::tools::sandboxing::SandboxablePreference; use crate::tools::sandboxing::ToolCtx; @@ -34,12 +33,6 @@ pub struct ApplyPatchRequest { pub codex_exe: Option, } -impl ProvidesSandboxRetryData for ApplyPatchRequest { - fn sandbox_retry_data(&self) -> Option { - None - } -} - #[derive(Default)] pub struct ApplyPatchRuntime; @@ -70,7 +63,7 @@ impl ApplyPatchRuntime { expiration: req.timeout_ms.into(), // Run apply_patch with a minimal environment for determinism and to avoid leaks. env: HashMap::new(), - with_escalated_permissions: None, + sandbox_permissions: SandboxPermissions::UseDefault, justification: None, }) } @@ -114,7 +107,6 @@ impl Approvable for ApplyPatchRuntime { let call_id = ctx.call_id.to_string(); let cwd = req.cwd.clone(); let retry_reason = ctx.retry_reason.clone(); - let risk = ctx.risk.clone(); let user_explicitly_approved = req.user_explicitly_approved; Box::pin(async move { with_cached_approval(&session.services, key, move || async move { @@ -126,7 +118,7 @@ impl Approvable for ApplyPatchRuntime { vec!["apply_patch".to_string()], cwd, Some(reason), - risk, + None, ) .await } else if user_explicitly_approved { diff --git a/codex-rs/core/src/tools/runtimes/mod.rs b/codex-rs/core/src/tools/runtimes/mod.rs index 437f4af428..38bfaebe6c 100644 --- a/codex-rs/core/src/tools/runtimes/mod.rs +++ b/codex-rs/core/src/tools/runtimes/mod.rs @@ -6,6 +6,8 @@ small and focused and reuses the orchestrator for approvals + sandbox + retry. */ use crate::exec::ExecExpiration; use crate::sandboxing::CommandSpec; +use crate::sandboxing::SandboxPermissions; +use crate::shell::Shell; use crate::tools::sandboxing::ToolError; use std::collections::HashMap; use std::path::Path; @@ -21,7 +23,7 @@ pub(crate) fn build_command_spec( cwd: &Path, env: &HashMap, expiration: ExecExpiration, - with_escalated_permissions: Option, + sandbox_permissions: SandboxPermissions, justification: Option, ) -> Result { let (program, args) = command @@ -33,7 +35,43 @@ pub(crate) fn build_command_spec( cwd: cwd.to_path_buf(), env: env.clone(), expiration, - with_escalated_permissions, + sandbox_permissions, justification, }) } + +/// POSIX-only helper: for commands produced by `Shell::derive_exec_args` +/// for Bash/Zsh/sh of the form `[shell_path, "-lc", "