diff --git a/.github/actions/linux-code-sign/action.yml b/.github/actions/linux-code-sign/action.yml new file mode 100644 index 0000000000..5a117b0805 --- /dev/null +++ b/.github/actions/linux-code-sign/action.yml @@ -0,0 +1,44 @@ +name: linux-code-sign +description: Sign Linux artifacts with cosign. +inputs: + target: + description: Target triple for the artifacts to sign. + required: true + artifacts-dir: + description: Absolute path to the directory containing built binaries to sign. + required: true + +runs: + using: composite + steps: + - name: Install cosign + uses: sigstore/cosign-installer@v3.7.0 + + - name: Cosign Linux artifacts + shell: bash + env: + COSIGN_EXPERIMENTAL: "1" + COSIGN_YES: "true" + COSIGN_OIDC_CLIENT_ID: "sigstore" + COSIGN_OIDC_ISSUER: "https://oauth2.sigstore.dev/auth" + run: | + set -euo pipefail + + dest="${{ inputs.artifacts-dir }}" + if [[ ! -d "$dest" ]]; then + echo "Destination $dest does not exist" + exit 1 + fi + + for binary in codex codex-responses-api-proxy; do + artifact="${dest}/${binary}" + if [[ ! -f "$artifact" ]]; then + echo "Binary $artifact not found" + exit 1 + fi + + cosign sign-blob \ + --yes \ + --bundle "${artifact}.sigstore" \ + "$artifact" + done diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 08c39db69d..0be45540c1 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -369,6 +369,57 @@ jobs: steps: - uses: actions/checkout@v6 + + # We have been running out of space when running this job on Linux for + # x86_64-unknown-linux-gnu, so remove some unnecessary dependencies. + - name: Remove unnecessary dependencies to save space + if: ${{ startsWith(matrix.runner, 'ubuntu') }} + shell: bash + run: | + set -euo pipefail + sudo rm -rf \ + /usr/local/lib/android \ + /usr/share/dotnet \ + /usr/local/share/boost \ + /usr/local/lib/node_modules \ + /opt/ghc + sudo apt-get remove -y docker.io docker-compose podman buildah + + # Ensure brew includes this fix so that brew's shellenv.sh loads + # cleanly in the Codex sandbox (it is frequently eval'd via .zprofile + # for Brew users, including the macOS runners on GitHub): + # + # https://github.com/Homebrew/brew/pull/21157 + # + # Once brew 5.0.5 is released and is the default on macOS runners, this + # step can be removed. + - name: Upgrade brew + if: ${{ startsWith(matrix.runner, 'macos') }} + shell: bash + run: | + set -euo pipefail + brew --version + git -C "$(brew --repo)" fetch origin + git -C "$(brew --repo)" checkout main + git -C "$(brew --repo)" reset --hard origin/main + export HOMEBREW_UPDATE_TO_TAG=0 + brew update + brew upgrade + brew --version + + # Some integration tests rely on DotSlash being installed. + # See https://github.com/openai/codex/pull/7617. + - name: Install DotSlash + uses: facebook/install-dotslash@v2 + + - name: Pre-fetch DotSlash artifacts + # The Bash wrapper is not available on Windows. + if: ${{ !startsWith(matrix.runner, 'windows') }} + shell: bash + run: | + set -euo pipefail + dotslash -- fetch exec-server/tests/suite/bash + - uses: dtolnay/rust-toolchain@1.90 with: targets: ${{ matrix.target }} diff --git a/.github/workflows/rust-release.yml b/.github/workflows/rust-release.yml index 14f8aa0327..c3e9eeef9a 100644 --- a/.github/workflows/rust-release.yml +++ b/.github/workflows/rust-release.yml @@ -50,6 +50,9 @@ jobs: name: Build - ${{ matrix.runner }} - ${{ matrix.target }} runs-on: ${{ matrix.runner }} timeout-minutes: 30 + permissions: + contents: read + id-token: write defaults: run: working-directory: codex-rs @@ -100,6 +103,13 @@ jobs: - name: Cargo build run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy + - if: ${{ contains(matrix.target, 'linux') }} + name: Cosign Linux artifacts + uses: ./.github/actions/linux-code-sign + with: + target: ${{ matrix.target }} + artifacts-dir: ${{ github.workspace }}/codex-rs/target/${{ matrix.target }}/release + - if: ${{ matrix.runner == 'macos-15-xlarge' }} name: Configure Apple code signing shell: bash @@ -283,6 +293,11 @@ jobs: cp target/${{ matrix.target }}/release/codex-responses-api-proxy "$dest/codex-responses-api-proxy-${{ matrix.target }}" fi + if [[ "${{ matrix.target }}" == *linux* ]]; then + cp target/${{ matrix.target }}/release/codex.sigstore "$dest/codex-${{ matrix.target }}.sigstore" + cp target/${{ matrix.target }}/release/codex-responses-api-proxy.sigstore "$dest/codex-responses-api-proxy-${{ matrix.target }}.sigstore" + fi + - if: ${{ matrix.runner == 'windows-11-arm' }} name: Install zstd shell: powershell @@ -321,6 +336,11 @@ jobs: continue fi + # Don't try to compress signature bundles. + if [[ "$base" == *.sigstore ]]; then + continue + fi + # Create per-binary tar.gz tar -C "$dest" -czf "$dest/${base}.tar.gz" "$base" diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index b77cf01b02..3e7f0fd8b0 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1256,11 +1256,14 @@ name = "codex-exec-server" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "async-trait", "clap", "codex-core", "codex-execpolicy", + "exec_server_test_support", "libc", + "maplit", "path-absolutize", "pretty_assertions", "rmcp", @@ -1273,6 +1276,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "which", ] [[package]] @@ -1298,7 +1302,7 @@ dependencies = [ "allocative", "anyhow", "clap", - "derive_more 2.0.1", + "derive_more 2.1.0", "env_logger", "log", "multimap", @@ -1589,7 +1593,7 @@ dependencies = [ "codex-windows-sandbox", "color-eyre", "crossterm", - "derive_more 2.0.1", + "derive_more 2.1.0", "diffy", "dirs", "dunce", @@ -1791,9 +1795,9 @@ dependencies = [ [[package]] name = "convert_case" -version = "0.7.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" dependencies = [ "unicode-segmentation", ] @@ -2132,11 +2136,11 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" dependencies = [ - "derive_more-impl 2.0.1", + "derive_more-impl 2.1.0", ] [[package]] @@ -2154,13 +2158,14 @@ dependencies = [ [[package]] name = "derive_more-impl" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" dependencies = [ - "convert_case 0.7.1", + "convert_case 0.10.0", "proc-macro2", "quote", + "rustc_version", "syn 2.0.104", "unicode-xid", ] @@ -2501,6 +2506,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "exec_server_test_support" +version = "0.0.0" +dependencies = [ + "anyhow", + "assert_cmd", + "codex-core", + "rmcp", + "serde_json", + "tokio", +] + [[package]] name = "eyre" version = "0.6.12" @@ -2545,7 +2562,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.0.8", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -3382,9 +3399,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.43.2" +version = "1.44.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" +checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698" dependencies = [ "console", "once_cell", @@ -3448,7 +3465,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -5236,7 +5253,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -6921,9 +6938,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "ts-rs" -version = "11.0.1" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef1b7a6d914a34127ed8e1fa927eb7088903787bcded4fa3eef8f85ee1568be" +checksum = "4994acea2522cd2b3b85c1d9529a55991e3ad5e25cdcd3de9d505972c4379424" dependencies = [ "serde_json", "thiserror 2.0.17", @@ -6933,9 +6950,9 @@ dependencies = [ [[package]] name = "ts-rs-macros" -version = "11.0.1" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9d4ed7b4c18cc150a6a0a1e9ea1ecfa688791220781af6e119f9599a8502a0a" +checksum = "ee6ff59666c9cbaec3533964505d39154dc4e0a56151fdea30a09ed0301f62e2" dependencies = [ "proc-macro2", "quote", @@ -7391,9 +7408,9 @@ dependencies = [ [[package]] name = "wildmatch" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39b7d07a236abaef6607536ccfaf19b396dbe3f5110ddb73d39f4562902ed382" +checksum = "29333c3ea1ba8b17211763463ff24ee84e41c78224c16b001cd907e663a38c68" [[package]] name = "winapi" @@ -7417,7 +7434,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index d3d3c36c3e..2086fbe897 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -96,6 +96,7 @@ codex-utils-readiness = { path = "utils/readiness" } codex-utils-string = { path = "utils/string" } codex-windows-sandbox = { path = "windows-sandbox-rs" } core_test_support = { path = "core/tests/common" } +exec_server_test_support = { path = "exec-server/tests/common" } mcp-types = { path = "mcp-types" } mcp_test_support = { path = "mcp-server/tests/common" } @@ -138,7 +139,7 @@ icu_provider = { version = "2.1", features = ["sync"] } ignore = "0.4.23" image = { version = "^0.25.9", default-features = false } indexmap = "2.12.0" -insta = "1.43.2" +insta = "1.44.3" itertools = "0.14.0" keyring = { version = "3.6", default-features = false } landlock = "0.4.1" @@ -222,7 +223,7 @@ vt100 = "0.16.2" walkdir = "2.5.0" webbrowser = "1.0" which = "6" -wildmatch = "2.5.0" +wildmatch = "2.6.1" wiremock = "0.6" zeroize = "1.8.2" diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 35d7047661..ea70b805b0 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use crate::protocol::common::AuthMode; use codex_protocol::account::PlanType; +use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment; use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent; @@ -287,6 +288,11 @@ v2_enum_from_core!( #[ts(export_to = "v2/")] pub enum ApprovalDecision { Accept, + /// Approve and remember the approval for the session. + AcceptForSession, + AcceptWithExecpolicyAmendment { + execpolicy_amendment: ExecPolicyAmendment, + }, Decline, Cancel, } @@ -382,6 +388,27 @@ impl From for SandboxCommandAssessment { } } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +#[serde(transparent)] +#[ts(type = "Array", export_to = "v2/")] +pub struct ExecPolicyAmendment { + pub command: Vec, +} + +impl ExecPolicyAmendment { + pub fn into_core(self) -> CoreExecPolicyAmendment { + CoreExecPolicyAmendment::new(self.command) + } +} + +impl From for ExecPolicyAmendment { + fn from(value: CoreExecPolicyAmendment) -> Self { + Self { + command: value.command().to_vec(), + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "camelCase")] #[ts(tag = "type")] @@ -1468,15 +1495,8 @@ pub struct CommandExecutionRequestApprovalParams { pub reason: Option, /// Optional model-provided risk assessment describing the blocked command. pub risk: Option, -} - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] -#[serde(rename_all = "camelCase")] -#[ts(export_to = "v2/")] -pub struct CommandExecutionRequestAcceptSettings { - /// If true, automatically approve this command for the duration of the session. - #[serde(default)] - pub for_session: bool, + /// Optional proposed execpolicy amendment to allow similar commands without prompting. + pub proposed_execpolicy_amendment: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] @@ -1484,10 +1504,6 @@ pub struct CommandExecutionRequestAcceptSettings { #[ts(export_to = "v2/")] pub struct CommandExecutionRequestApprovalResponse { pub decision: ApprovalDecision, - /// Optional approval settings for when the decision is `accept`. - /// Ignored if the decision is `decline` or `cancel`. - #[serde(default)] - pub accept_settings: Option, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] diff --git a/codex-rs/app-server-test-client/src/main.rs b/codex-rs/app-server-test-client/src/main.rs index 8c2a38e46c..92255cecd3 100644 --- a/codex-rs/app-server-test-client/src/main.rs +++ b/codex-rs/app-server-test-client/src/main.rs @@ -21,7 +21,6 @@ use codex_app_server_protocol::ApprovalDecision; use codex_app_server_protocol::AskForApproval; use codex_app_server_protocol::ClientInfo; use codex_app_server_protocol::ClientRequest; -use codex_app_server_protocol::CommandExecutionRequestAcceptSettings; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; use codex_app_server_protocol::FileChangeRequestApprovalParams; @@ -754,6 +753,7 @@ impl CodexClient { item_id, reason, risk, + proposed_execpolicy_amendment, } = params; println!( @@ -765,10 +765,12 @@ impl CodexClient { if let Some(risk) = risk.as_ref() { println!("< risk assessment: {risk:?}"); } + if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() { + println!("< proposed execpolicy amendment: {execpolicy_amendment:?}"); + } let response = CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Accept, - accept_settings: Some(CommandExecutionRequestAcceptSettings { for_session: false }), }; self.send_server_request_response(request_id, &response)?; println!("< approved commandExecution request for item {item_id}"); diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index 94676999b5..2fda7bcf58 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -18,6 +18,7 @@ use codex_app_server_protocol::ContextCompactedNotification; use codex_app_server_protocol::ErrorNotification; use codex_app_server_protocol::ExecCommandApprovalParams; use codex_app_server_protocol::ExecCommandApprovalResponse; +use codex_app_server_protocol::ExecPolicyAmendment as V2ExecPolicyAmendment; use codex_app_server_protocol::FileChangeOutputDeltaNotification; use codex_app_server_protocol::FileChangeRequestApprovalParams; use codex_app_server_protocol::FileChangeRequestApprovalResponse; @@ -179,7 +180,7 @@ pub(crate) async fn apply_bespoke_event_handling( cwd, reason, risk, - proposed_execpolicy_amendment: _, + proposed_execpolicy_amendment, parsed_cmd, }) => match api_version { ApiVersion::V1 => { @@ -207,6 +208,8 @@ pub(crate) async fn apply_bespoke_event_handling( .map(V2ParsedCommand::from) .collect::>(); let command_string = shlex_join(&command); + let proposed_execpolicy_amendment_v2 = + proposed_execpolicy_amendment.map(V2ExecPolicyAmendment::from); let params = CommandExecutionRequestApprovalParams { thread_id: conversation_id.to_string(), @@ -216,6 +219,7 @@ pub(crate) async fn apply_bespoke_event_handling( item_id: item_id.clone(), reason, risk: risk.map(V2SandboxCommandAssessment::from), + proposed_execpolicy_amendment: proposed_execpolicy_amendment_v2, }; let rx = outgoing .send_request(ServerRequestPayload::CommandExecutionRequestApproval( @@ -1047,7 +1051,11 @@ async fn on_file_change_request_approval_response( }); let (decision, completion_status) = match response.decision { - ApprovalDecision::Accept => (ReviewDecision::Approved, None), + ApprovalDecision::Accept + | ApprovalDecision::AcceptForSession + | ApprovalDecision::AcceptWithExecpolicyAmendment { .. } => { + (ReviewDecision::Approved, None) + } ApprovalDecision::Decline => { (ReviewDecision::Denied, Some(PatchApplyStatus::Declined)) } @@ -1109,25 +1117,27 @@ async fn on_command_execution_request_approval_response( error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}"); CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Decline, - accept_settings: None, } }); - let CommandExecutionRequestApprovalResponse { - decision, - accept_settings, - } = response; + let decision = response.decision; - let (decision, completion_status) = match (decision, accept_settings) { - (ApprovalDecision::Accept, Some(settings)) if settings.for_session => { - (ReviewDecision::ApprovedForSession, None) - } - (ApprovalDecision::Accept, _) => (ReviewDecision::Approved, None), - (ApprovalDecision::Decline, _) => ( + let (decision, completion_status) = match decision { + ApprovalDecision::Accept => (ReviewDecision::Approved, None), + ApprovalDecision::AcceptForSession => (ReviewDecision::ApprovedForSession, None), + ApprovalDecision::AcceptWithExecpolicyAmendment { + execpolicy_amendment, + } => ( + ReviewDecision::ApprovedExecpolicyAmendment { + proposed_execpolicy_amendment: execpolicy_amendment.into_core(), + }, + None, + ), + ApprovalDecision::Decline => ( ReviewDecision::Denied, Some(CommandExecutionStatus::Declined), ), - (ApprovalDecision::Cancel, _) => ( + ApprovalDecision::Cancel => ( ReviewDecision::Abort, Some(CommandExecutionStatus::Declined), ), diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index e4cd722947..afc22c7072 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -427,7 +427,6 @@ async fn turn_start_exec_approval_decline_v2() -> Result<()> { request_id, serde_json::to_value(CommandExecutionRequestApprovalResponse { decision: ApprovalDecision::Decline, - accept_settings: None, })?, ) .await?; diff --git a/codex-rs/codex-api/src/endpoint/models.rs b/codex-rs/codex-api/src/endpoint/models.rs index fec8d7f292..5de08432f0 100644 --- a/codex-rs/codex-api/src/endpoint/models.rs +++ b/codex-rs/codex-api/src/endpoint/models.rs @@ -8,6 +8,7 @@ use codex_client::RequestTelemetry; use codex_protocol::openai_models::ModelsResponse; use http::HeaderMap; use http::Method; +use http::header::ETAG; use std::sync::Arc; pub struct ModelsClient { @@ -59,12 +60,23 @@ impl ModelsClient { ) .await?; - serde_json::from_slice::(&resp.body).map_err(|e| { - ApiError::Stream(format!( - "failed to decode models response: {e}; body: {}", - String::from_utf8_lossy(&resp.body) - )) - }) + let header_etag = resp + .headers + .get(ETAG) + .and_then(|value| value.to_str().ok()) + .map(ToString::to_string); + + let ModelsResponse { models, etag } = serde_json::from_slice::(&resp.body) + .map_err(|e| { + ApiError::Stream(format!( + "failed to decode models response: {e}; body: {}", + String::from_utf8_lossy(&resp.body) + )) + })?; + + let etag = header_etag.unwrap_or(etag); + + Ok(ModelsResponse { models, etag }) } } @@ -86,20 +98,36 @@ mod tests { use std::sync::Mutex; use std::time::Duration; - #[derive(Clone, Default)] + #[derive(Clone)] struct CapturingTransport { last_request: Arc>>, body: Arc, } + impl Default for CapturingTransport { + fn default() -> Self { + Self { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(ModelsResponse { + models: Vec::new(), + etag: String::new(), + }), + } + } + } + #[async_trait] impl HttpTransport for CapturingTransport { async fn execute(&self, req: Request) -> Result { *self.last_request.lock().unwrap() = Some(req); let body = serde_json::to_vec(&*self.body).unwrap(); + let mut headers = HeaderMap::new(); + if !self.body.etag.is_empty() { + headers.insert(ETAG, self.body.etag.parse().unwrap()); + } Ok(Response { status: StatusCode::OK, - headers: HeaderMap::new(), + headers, body: body.into(), }) } @@ -138,7 +166,10 @@ mod tests { #[tokio::test] async fn appends_client_version_query() { - let response = ModelsResponse { models: Vec::new() }; + let response = ModelsResponse { + models: Vec::new(), + etag: String::new(), + }; let transport = CapturingTransport { last_request: Arc::new(Mutex::new(None)), @@ -181,15 +212,17 @@ mod tests { "display_name": "gpt-test", "description": "desc", "default_reasoning_level": "medium", - "supported_reasoning_levels": ["low", "medium", "high"], + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}, {"effort": "high", "description": "high"}], "shell_type": "shell_command", "visibility": "list", "minimal_client_version": [0, 99, 0], "supported_in_api": true, - "priority": 1 + "priority": 1, + "upgrade": null, })) .unwrap(), ], + etag: String::new(), }; let transport = CapturingTransport { @@ -213,4 +246,31 @@ mod tests { assert_eq!(result.models[0].supported_in_api, true); assert_eq!(result.models[0].priority, 1); } + + #[tokio::test] + async fn list_models_includes_etag() { + let response = ModelsResponse { + models: Vec::new(), + etag: "\"abc\"".to_string(), + }; + + let transport = CapturingTransport { + last_request: Arc::new(Mutex::new(None)), + body: Arc::new(response), + }; + + let client = ModelsClient::new( + transport, + provider("https://example.com/api/codex"), + DummyAuth, + ); + + let result = client + .list_models("0.1.0", HeaderMap::new()) + .await + .expect("request should succeed"); + + assert_eq!(result.models.len(), 0); + assert_eq!(result.etag, "\"abc\""); + } } diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 3b4077f534..6ef328188f 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -10,6 +10,7 @@ use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; use http::HeaderMap; use http::Method; use wiremock::Mock; @@ -57,16 +58,27 @@ async fn models_client_hits_models_endpoint() { description: Some("desc".to_string()), default_reasoning_level: ReasoningEffort::Medium, supported_reasoning_levels: vec![ - ReasoningEffort::Low, - ReasoningEffort::Medium, - ReasoningEffort::High, + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: ReasoningEffort::Low.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: ReasoningEffort::High.to_string(), + }, ], shell_type: ConfigShellToolType::ShellCommand, visibility: ModelVisibility::List, minimal_client_version: ClientVersion(0, 1, 0), supported_in_api: true, priority: 1, + upgrade: None, }], + etag: String::new(), }; Mock::given(method("GET")) diff --git a/codex-rs/core/src/auth.rs b/codex-rs/core/src/auth.rs index 72359ca4ca..57ffa17260 100644 --- a/codex-rs/core/src/auth.rs +++ b/codex-rs/core/src/auth.rs @@ -32,7 +32,9 @@ use crate::token_data::TokenData; use crate::token_data::parse_id_token; use crate::util::try_parse_error_message; use codex_protocol::account::PlanType as AccountPlanType; +use once_cell::sync::Lazy; use serde_json::Value; +use tempfile::TempDir; use thiserror::Error; #[derive(Debug, Clone)] @@ -62,6 +64,8 @@ const REFRESH_TOKEN_UNKNOWN_MESSAGE: &str = const REFRESH_TOKEN_URL: &str = "https://auth.openai.com/oauth/token"; pub const REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR: &str = "CODEX_REFRESH_TOKEN_URL_OVERRIDE"; +static TEST_AUTH_TEMP_DIRS: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + #[derive(Debug, Error)] pub enum RefreshTokenError { #[error("{0}")] @@ -1088,11 +1092,19 @@ impl AuthManager { } } + #[cfg(any(test, feature = "test-support"))] + #[expect(clippy::expect_used)] /// Create an AuthManager with a specific CodexAuth, for testing only. pub fn from_auth_for_testing(auth: CodexAuth) -> Arc { let cached = CachedAuth { auth: Some(auth) }; + let temp_dir = tempfile::tempdir().expect("temp codex home"); + let codex_home = temp_dir.path().to_path_buf(); + TEST_AUTH_TEMP_DIRS + .lock() + .expect("lock test codex homes") + .push(temp_dir); Arc::new(Self { - codex_home: PathBuf::new(), + codex_home, inner: RwLock::new(cached), enable_codex_api_key_env: false, auth_credentials_store_mode: AuthCredentialsStoreMode::File, @@ -1104,6 +1116,10 @@ impl AuthManager { self.inner.read().ok().and_then(|c| c.auth.clone()) } + pub fn codex_home(&self) -> &Path { + &self.codex_home + } + /// Force a reload of the auth information from auth.json. Returns /// whether the auth value changed. pub fn reload(&self) -> bool { diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 70fdf80b81..ad80f66c6d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -536,7 +536,7 @@ impl Session { for (alias, feature) in config.features.legacy_feature_usages() { let canonical = feature.key(); - let summary = format!("`{alias}` is deprecated. Use `{canonical}` instead."); + let summary = format!("`{alias}` is deprecated. Use `[features].{canonical}` instead."); let details = if alias == canonical { None } else { @@ -1470,6 +1470,16 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv let mut previous_context: Option> = Some(sess.new_turn(SessionSettingsUpdate::default()).await); + if config.features.enabled(Feature::RemoteModels) + && let Err(err) = sess + .services + .models_manager + .refresh_available_models(&config.model_provider) + .await + { + error!("failed to refresh available models: {err}"); + } + // To break out of this loop, send Op::Shutdown. while let Ok(sub) = rx_sub.recv().await { debug!(?sub, "Submission"); diff --git a/codex-rs/core/src/conversation_manager.rs b/codex-rs/core/src/conversation_manager.rs index e527507c1c..b1818849eb 100644 --- a/codex-rs/core/src/conversation_manager.rs +++ b/codex-rs/core/src/conversation_manager.rs @@ -51,6 +51,7 @@ impl ConversationManager { } } + #[cfg(any(test, feature = "test-support"))] /// Construct with a dummy AuthManager containing the provided CodexAuth. /// Used for integration tests: should not be used by ordinary business logic. pub fn with_auth(auth: CodexAuth) -> Self { @@ -213,7 +214,7 @@ impl ConversationManager { } pub async fn list_models(&self) -> Vec { - self.models_manager.available_models.read().await.clone() + self.models_manager.list_models().await } pub fn get_models_manager(&self) -> Arc { diff --git a/codex-rs/core/src/exec_policy.rs b/codex-rs/core/src/exec_policy.rs index 2d1c2efe5e..6de2967c76 100644 --- a/codex-rs/core/src/exec_policy.rs +++ b/codex-rs/core/src/exec_policy.rs @@ -34,6 +34,13 @@ const POLICY_DIR_NAME: &str = "policy"; const POLICY_EXTENSION: &str = "codexpolicy"; const DEFAULT_POLICY_FILE: &str = "default.codexpolicy"; +fn is_policy_match(rule_match: &RuleMatch) -> bool { + match rule_match { + RuleMatch::PrefixRuleMatch { .. } => true, + RuleMatch::HeuristicsRuleMatch { .. } => false, + } +} + #[derive(Debug, Error)] pub enum ExecPolicyError { #[error("failed to read execpolicy files from {dir}: {source}")] @@ -147,49 +154,62 @@ pub(crate) async fn append_execpolicy_amendment_and_update( Ok(()) } -/// Returns a proposed execpolicy amendment only when heuristics caused -/// the prompt decision, so we can offer to apply that amendment for future runs. -/// -/// The amendment uses the first command heuristics marked as `Prompt`. If any explicit -/// execpolicy rule also prompts, we return `None` because applying the amendment would not -/// skip that policy requirement. -/// -/// Examples: +/// Derive a proposed execpolicy amendment when a command requires user approval +/// - If any execpolicy rule prompts, return None, because an amendment would not skip that policy requirement. +/// - Otherwise return the first heuristics Prompt. +/// - Examples: /// - execpolicy: empty. Command: `["python"]`. Heuristics prompt -> `Some(vec!["python"])`. /// - execpolicy: empty. Command: `["bash", "-c", "cd /some/folder && prog1 --option1 arg1 && prog2 --option2 arg2"]`. /// Parsed commands include `cd /some/folder`, `prog1 --option1 arg1`, and `prog2 --option2 arg2`. If heuristics allow `cd` but prompt /// on `prog1`, we return `Some(vec!["prog1", "--option1", "arg1"])`. /// - execpolicy: contains a `prompt for prefix ["prog2"]` rule. For the same command as above, /// we return `None` because an execpolicy prompt still applies even if we amend execpolicy to allow ["prog1", "--option1", "arg1"]. -fn proposed_execpolicy_amendment(evaluation: &Evaluation) -> Option { - if evaluation.decision != Decision::Prompt { +fn try_derive_execpolicy_amendment_for_prompt_rules( + matched_rules: &[RuleMatch], +) -> Option { + if matched_rules + .iter() + .any(|rule_match| is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt) + { return None; } - let mut first_prompt_from_heuristics: Option> = None; - for rule_match in &evaluation.matched_rules { - match rule_match { - RuleMatch::HeuristicsRuleMatch { command, decision } => { - if *decision == Decision::Prompt && first_prompt_from_heuristics.is_none() { - first_prompt_from_heuristics = Some(command.clone()); - } - } - _ if rule_match.decision() == Decision::Prompt => { - return None; - } - _ => {} - } + matched_rules + .iter() + .find_map(|rule_match| match rule_match { + RuleMatch::HeuristicsRuleMatch { + command, + decision: Decision::Prompt, + } => Some(ExecPolicyAmendment::from(command.clone())), + _ => None, + }) +} + +/// - Note: we only use this amendment when the command fails to run in sandbox and codex prompts the user to run outside the sandbox +/// - The purpose of this amendment is to bypass sandbox for similar commands in the future +/// - If any execpolicy rule matches, return None, because we would already be running command outside the sandbox +fn try_derive_execpolicy_amendment_for_allow_rules( + matched_rules: &[RuleMatch], +) -> Option { + if matched_rules.iter().any(is_policy_match) { + return None; } - first_prompt_from_heuristics.map(ExecPolicyAmendment::from) + matched_rules + .iter() + .find_map(|rule_match| match rule_match { + RuleMatch::HeuristicsRuleMatch { + command, + decision: Decision::Allow, + } => Some(ExecPolicyAmendment::from(command.clone())), + _ => None, + }) } /// Only return PROMPT_REASON when an execpolicy rule drove the prompt decision. fn derive_prompt_reason(evaluation: &Evaluation) -> Option { evaluation.matched_rules.iter().find_map(|rule_match| { - if !matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. }) - && rule_match.decision() == Decision::Prompt - { + if is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt { Some(PROMPT_REASON.to_string()) } else { None @@ -215,10 +235,6 @@ pub(crate) async fn create_exec_approval_requirement_for_command( }; let policy = exec_policy.read().await; let evaluation = policy.check_multiple(commands.iter(), &heuristics_fallback); - let has_policy_allow = evaluation.matched_rules.iter().any(|rule_match| { - !matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. }) - && rule_match.decision() == Decision::Allow - }); match evaluation.decision { Decision::Forbidden => ExecApprovalRequirement::Forbidden { @@ -233,7 +249,7 @@ pub(crate) async fn create_exec_approval_requirement_for_command( ExecApprovalRequirement::NeedsApproval { reason: derive_prompt_reason(&evaluation), proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { - proposed_execpolicy_amendment(&evaluation) + try_derive_execpolicy_amendment_for_prompt_rules(&evaluation.matched_rules) } else { None }, @@ -241,7 +257,15 @@ pub(crate) async fn create_exec_approval_requirement_for_command( } } Decision::Allow => ExecApprovalRequirement::Skip { - bypass_sandbox: has_policy_allow, + // Bypass sandbox if execpolicy allows the command + bypass_sandbox: evaluation.matched_rules.iter().any(|rule_match| { + is_policy_match(rule_match) && rule_match.decision() == Decision::Allow + }), + proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) { + try_derive_execpolicy_amendment_for_allow_rules(&evaluation.matched_rules) + } else { + None + }, }, } } @@ -730,4 +754,56 @@ prefix_rule(pattern=["rm"], decision="forbidden") } ); } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_present_when_heuristics_allow() { + let command = vec!["echo".to_string(), "safe".to_string()]; + + let requirement = create_exec_approval_requirement_for_command( + &Arc::new(RwLock::new(Policy::empty())), + &Features::with_defaults(), + &command, + AskForApproval::OnRequest, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::Skip { + bypass_sandbox: false, + proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)), + } + ); + } + + #[tokio::test] + async fn proposed_execpolicy_amendment_is_suppressed_when_policy_matches_allow() { + let policy_src = r#"prefix_rule(pattern=["echo"], decision="allow")"#; + let mut parser = PolicyParser::new(); + parser + .parse("test.codexpolicy", policy_src) + .expect("parse policy"); + let policy = Arc::new(RwLock::new(parser.build())); + let command = vec!["echo".to_string(), "safe".to_string()]; + + let requirement = create_exec_approval_requirement_for_command( + &policy, + &Features::with_defaults(), + &command, + AskForApproval::OnRequest, + &SandboxPolicy::ReadOnly, + SandboxPermissions::UseDefault, + ) + .await; + + assert_eq!( + requirement, + ExecApprovalRequirement::Skip { + bypass_sandbox: true, + proposed_execpolicy_amendment: None, + } + ); + } } diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 1d775360c4..69442815e7 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -54,6 +54,8 @@ pub enum Feature { WindowsSandbox, /// Remote compaction enabled (only for ChatGPT auth) RemoteCompaction, + /// Refresh remote models and emit AppReady once the list is available. + RemoteModels, /// Allow model to call multiple tools in parallel (only for models supporting it). ParallelToolCalls, /// Experimental skills injection (CLI flag-driven). @@ -333,6 +335,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Experimental, default_enabled: true, }, + FeatureSpec { + id: Feature::RemoteModels, + key: "remote_models", + stage: Stage::Experimental, + default_enabled: false, + }, FeatureSpec { id: Feature::ParallelToolCalls, key: "parallel", diff --git a/codex-rs/core/src/openai_models/cache.rs b/codex-rs/core/src/openai_models/cache.rs new file mode 100644 index 0000000000..cac16cc853 --- /dev/null +++ b/codex-rs/core/src/openai_models/cache.rs @@ -0,0 +1,56 @@ +use chrono::DateTime; +use chrono::Utc; +use codex_protocol::openai_models::ModelInfo; +use serde::Deserialize; +use serde::Serialize; +use std::io; +use std::io::ErrorKind; +use std::path::Path; +use std::time::Duration; +use tokio::fs; + +/// Serialized snapshot of models and metadata cached on disk. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct ModelsCache { + pub(crate) fetched_at: DateTime, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub(crate) etag: Option, + pub(crate) models: Vec, +} + +impl ModelsCache { + /// Returns `true` when the cache entry has not exceeded the configured TTL. + pub(crate) fn is_fresh(&self, ttl: Duration) -> bool { + if ttl.is_zero() { + return false; + } + let Ok(ttl_duration) = chrono::Duration::from_std(ttl) else { + return false; + }; + let age = Utc::now().signed_duration_since(self.fetched_at); + age <= ttl_duration + } +} + +/// Read and deserialize the cache file if it exists. +pub(crate) async fn load_cache(path: &Path) -> io::Result> { + match fs::read(path).await { + Ok(contents) => { + let cache = serde_json::from_slice(&contents) + .map_err(|err| io::Error::new(ErrorKind::InvalidData, err.to_string()))?; + Ok(Some(cache)) + } + Err(err) if err.kind() == ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} + +/// Persist the cache contents to disk, creating parent directories as needed. +pub(crate) async fn save_cache(path: &Path, cache: &ModelsCache) -> io::Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).await?; + } + let json = serde_json::to_vec_pretty(cache) + .map_err(|err| io::Error::new(ErrorKind::InvalidData, err.to_string()))?; + fs::write(path, json).await +} diff --git a/codex-rs/core/src/openai_models/mod.rs b/codex-rs/core/src/openai_models/mod.rs index e7a8beddb1..a77438ebc9 100644 --- a/codex-rs/core/src/openai_models/mod.rs +++ b/codex-rs/core/src/openai_models/mod.rs @@ -1,3 +1,4 @@ +mod cache; pub mod model_family; pub mod model_presets; pub mod models_manager; diff --git a/codex-rs/core/src/openai_models/model_family.rs b/codex-rs/core/src/openai_models/model_family.rs index 6ee18ad9e3..507e1a48d9 100644 --- a/codex-rs/core/src/openai_models/model_family.rs +++ b/codex-rs/core/src/openai_models/model_family.rs @@ -291,6 +291,7 @@ mod tests { use super::*; use codex_protocol::openai_models::ClientVersion; use codex_protocol::openai_models::ModelVisibility; + use codex_protocol::openai_models::ReasoningEffortPreset; fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo { ModelInfo { @@ -298,12 +299,16 @@ mod tests { display_name: slug.to_string(), description: Some(format!("{slug} desc")), default_reasoning_level: effort, - supported_reasoning_levels: vec![effort], + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort, + description: effort.to_string(), + }], shell_type: shell, visibility: ModelVisibility::List, minimal_client_version: ClientVersion(0, 1, 0), supported_in_api: true, priority: 1, + upgrade: None, } } diff --git a/codex-rs/core/src/openai_models/models_manager.rs b/codex-rs/core/src/openai_models/models_manager.rs index 22edf04ffe..d50844098e 100644 --- a/codex-rs/core/src/openai_models/models_manager.rs +++ b/codex-rs/core/src/openai_models/models_manager.rs @@ -1,11 +1,19 @@ +use chrono::Utc; use codex_api::ModelsClient; use codex_api::ReqwestTransport; use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelsResponse; use http::HeaderMap; +use std::path::PathBuf; use std::sync::Arc; +use std::time::Duration; use tokio::sync::RwLock; +use tokio::sync::TryLockError; +use tracing::error; +use super::cache; +use super::cache::ModelsCache; use crate::api_bridge::auth_provider_from_auth; use crate::api_bridge::map_api_error; use crate::auth::AuthManager; @@ -17,50 +25,75 @@ use crate::openai_models::model_family::ModelFamily; use crate::openai_models::model_family::find_family_for_model; use crate::openai_models::model_presets::builtin_model_presets; +const MODEL_CACHE_FILE: &str = "models_cache.json"; +const DEFAULT_MODEL_CACHE_TTL: Duration = Duration::from_secs(300); + +/// Coordinates remote model discovery plus cached metadata on disk. #[derive(Debug)] pub struct ModelsManager { // todo(aibrahim) merge available_models and model family creation into one struct - pub available_models: RwLock>, - pub remote_models: RwLock>, - pub etag: String, - pub auth_manager: Arc, + available_models: RwLock>, + remote_models: RwLock>, + auth_manager: Arc, + etag: RwLock>, + codex_home: PathBuf, + cache_ttl: Duration, } impl ModelsManager { + /// Construct a manager scoped to the provided `AuthManager`. pub fn new(auth_manager: Arc) -> Self { + let codex_home = auth_manager.codex_home().to_path_buf(); Self { available_models: RwLock::new(builtin_model_presets(auth_manager.get_auth_mode())), remote_models: RwLock::new(Vec::new()), - etag: String::new(), auth_manager, + etag: RwLock::new(None), + codex_home, + cache_ttl: DEFAULT_MODEL_CACHE_TTL, } } - // do not use this function yet. It's work in progress. - pub async fn refresh_available_models( - &self, - provider: &ModelProviderInfo, - ) -> CoreResult> { + /// Fetch the latest remote models, using the on-disk cache when still fresh. + pub async fn refresh_available_models(&self, provider: &ModelProviderInfo) -> CoreResult<()> { + if self.try_load_cache().await { + return Ok(()); + } + let auth = self.auth_manager.auth(); let api_provider = provider.to_api_provider(auth.as_ref().map(|auth| auth.mode))?; let api_auth = auth_provider_from_auth(auth.clone(), provider).await?; let transport = ReqwestTransport::new(build_reqwest_client()); let client = ModelsClient::new(transport, api_provider, api_auth); - let response = client - .list_models(env!("CARGO_PKG_VERSION"), HeaderMap::new()) + let mut client_version = env!("CARGO_PKG_VERSION"); + if client_version == "0.0.0" { + client_version = "99.99.99"; + } + let ModelsResponse { models, etag } = client + .list_models(client_version, HeaderMap::new()) .await .map_err(map_api_error)?; - let models = response.models; - *self.remote_models.write().await = models.clone(); - { - let mut available_models_guard = self.available_models.write().await; - *available_models_guard = self.build_available_models().await; - } - Ok(models) + let etag = (!etag.is_empty()).then_some(etag); + + self.apply_remote_models(models.clone()).await; + *self.etag.write().await = etag.clone(); + self.persist_cache(&models, etag).await; + Ok(()) } + pub async fn list_models(&self) -> Vec { + self.available_models.read().await.clone() + } + + pub fn try_list_models(&self) -> Result, TryLockError> { + self.available_models + .try_read() + .map(|models| models.clone()) + } + + /// Look up the requested model family while applying remote metadata overrides. pub async fn construct_model_family(&self, model: &str, config: &Config) -> ModelFamily { find_family_for_model(model) .with_config_overrides(config) @@ -68,34 +101,88 @@ impl ModelsManager { } #[cfg(any(test, feature = "test-support"))] + /// Offline helper that builds a `ModelFamily` without consulting remote state. pub fn construct_model_family_offline(model: &str, config: &Config) -> ModelFamily { find_family_for_model(model).with_config_overrides(config) } - async fn build_available_models(&self) -> Vec { + /// Replace the cached remote models and rebuild the derived presets list. + async fn apply_remote_models(&self, models: Vec) { + *self.remote_models.write().await = models; + self.build_available_models().await; + } + + /// Attempt to satisfy the refresh from the cache when it matches the provider and TTL. + async fn try_load_cache(&self) -> bool { + let cache_path = self.cache_path(); + let cache = match cache::load_cache(&cache_path).await { + Ok(cache) => cache, + Err(err) => { + error!("failed to load models cache: {err}"); + return false; + } + }; + let cache = match cache { + Some(cache) => cache, + None => return false, + }; + if !cache.is_fresh(self.cache_ttl) { + return false; + } + let models = cache.models.clone(); + *self.etag.write().await = cache.etag.clone(); + self.apply_remote_models(models.clone()).await; + true + } + + /// Serialize the latest fetch to disk for reuse across future processes. + async fn persist_cache(&self, models: &[ModelInfo], etag: Option) { + let cache = ModelsCache { + fetched_at: Utc::now(), + etag, + models: models.to_vec(), + }; + let cache_path = self.cache_path(); + if let Err(err) = cache::save_cache(&cache_path, &cache).await { + error!("failed to write models cache: {err}"); + } + } + + /// Convert remote model metadata into picker-ready presets, marking defaults. + async fn build_available_models(&self) { let mut available_models = self.remote_models.read().await.clone(); available_models.sort_by(|a, b| b.priority.cmp(&a.priority)); - let mut model_presets: Vec = - available_models.into_iter().map(Into::into).collect(); + let mut model_presets: Vec = available_models + .into_iter() + .map(Into::into) + .filter(|preset: &ModelPreset| preset.show_in_picker) + .collect(); if let Some(default) = model_presets.first_mut() { default.is_default = true; } - model_presets + { + let mut available_models_guard = self.available_models.write().await; + *available_models_guard = model_presets; + } + } + + fn cache_path(&self) -> PathBuf { + self.codex_home.join(MODEL_CACHE_FILE) } } #[cfg(test)] mod tests { + use super::cache::ModelsCache; use super::*; use crate::CodexAuth; + use crate::auth::AuthCredentialsStoreMode; use crate::model_provider_info::WireApi; use codex_protocol::openai_models::ModelsResponse; + use core_test_support::responses::mount_models_once; use serde_json::json; - use wiremock::Mock; + use tempfile::tempdir; use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; fn remote_model(slug: &str, display: &str, priority: i32) -> ModelInfo { serde_json::from_value(json!({ @@ -103,12 +190,13 @@ mod tests { "display_name": display, "description": format!("{display} desc"), "default_reasoning_level": "medium", - "supported_reasoning_levels": ["low", "medium"], + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}], "shell_type": "shell_command", "visibility": "list", "minimal_client_version": [0, 1, 0], "supported_in_api": true, - "priority": priority + "priority": priority, + "upgrade": null, })) .expect("valid model") } @@ -138,35 +226,28 @@ mod tests { remote_model("priority-low", "Low", 1), remote_model("priority-high", "High", 10), ]; - let response = ModelsResponse { - models: remote_models.clone(), - }; - Mock::given(method("GET")) - .and(path("/models")) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-type", "application/json") - .set_body_json(&response), - ) - .expect(1) - .mount(&server) - .await; + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: remote_models.clone(), + etag: String::new(), + }, + ) + .await; let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")); let manager = ModelsManager::new(auth_manager); let provider = provider_for(server.uri()); - let returned = manager + manager .refresh_available_models(&provider) .await .expect("refresh succeeds"); - - assert_eq!(returned, remote_models); let cached_remote = manager.remote_models.read().await.clone(); assert_eq!(cached_remote, remote_models); - let available = manager.available_models.read().await.clone(); + let available = manager.list_models().await; assert_eq!(available.len(), 2); assert_eq!(available[0].model, "priority-high"); assert!( @@ -175,5 +256,128 @@ mod tests { ); assert_eq!(available[1].model, "priority-low"); assert!(!available[1].is_default); + assert_eq!( + models_mock.requests().len(), + 1, + "expected a single /models request" + ); + } + + #[tokio::test] + async fn refresh_available_models_uses_cache_when_fresh() { + let server = MockServer::start().await; + let remote_models = vec![remote_model("cached", "Cached", 5)]; + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: remote_models.clone(), + etag: String::new(), + }, + ) + .await; + + let codex_home = tempdir().expect("temp dir"); + let auth_manager = Arc::new(AuthManager::new( + codex_home.path().to_path_buf(), + false, + AuthCredentialsStoreMode::File, + )); + let manager = ModelsManager::new(auth_manager); + let provider = provider_for(server.uri()); + + manager + .refresh_available_models(&provider) + .await + .expect("first refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + remote_models, + "remote cache should store fetched models" + ); + + // Second call should read from cache and avoid the network. + manager + .refresh_available_models(&provider) + .await + .expect("cached refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + remote_models, + "cache path should not mutate stored models" + ); + assert_eq!( + models_mock.requests().len(), + 1, + "cache hit should avoid a second /models request" + ); + } + + #[tokio::test] + async fn refresh_available_models_refetches_when_cache_stale() { + let server = MockServer::start().await; + let initial_models = vec![remote_model("stale", "Stale", 1)]; + let initial_mock = mount_models_once( + &server, + ModelsResponse { + models: initial_models.clone(), + etag: String::new(), + }, + ) + .await; + + let codex_home = tempdir().expect("temp dir"); + let auth_manager = Arc::new(AuthManager::new( + codex_home.path().to_path_buf(), + false, + AuthCredentialsStoreMode::File, + )); + let manager = ModelsManager::new(auth_manager); + let provider = provider_for(server.uri()); + + manager + .refresh_available_models(&provider) + .await + .expect("initial refresh succeeds"); + + // Rewrite cache with an old timestamp so it is treated as stale. + let cache_path = codex_home.path().join(MODEL_CACHE_FILE); + let contents = + std::fs::read_to_string(&cache_path).expect("cache file should exist after refresh"); + let mut cache: ModelsCache = + serde_json::from_str(&contents).expect("cache should deserialize"); + cache.fetched_at = Utc::now() - chrono::Duration::hours(1); + std::fs::write(&cache_path, serde_json::to_string_pretty(&cache).unwrap()) + .expect("cache rewrite succeeds"); + + let updated_models = vec![remote_model("fresh", "Fresh", 9)]; + server.reset().await; + let refreshed_mock = mount_models_once( + &server, + ModelsResponse { + models: updated_models.clone(), + etag: String::new(), + }, + ) + .await; + + manager + .refresh_available_models(&provider) + .await + .expect("second refresh succeeds"); + assert_eq!( + *manager.remote_models.read().await, + updated_models, + "stale cache should trigger refetch" + ); + assert_eq!( + initial_mock.requests().len(), + 1, + "initial refresh should only hit /models once" + ); + assert_eq!( + refreshed_mock.requests().len(), + 1, + "stale cache refresh should fetch /models once" + ); } } diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index c3ef590e13..7d13c90fa0 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -148,6 +148,20 @@ impl ToolHandler for ShellCommandHandler { matches!(payload, ToolPayload::Function { .. }) } + fn is_mutating(&self, invocation: &ToolInvocation) -> bool { + let ToolPayload::Function { arguments } = &invocation.payload else { + return true; + }; + + serde_json::from_str::(arguments) + .map(|params| { + let shell = invocation.session.user_shell(); + let command = shell.derive_exec_args(¶ms.command, params.login.unwrap_or(true)); + !is_known_safe_command(&command) + }) + .unwrap_or(true) + } + async fn handle(&self, invocation: ToolInvocation) -> Result { let ToolInvocation { session, diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index 2af095ee92..50b6a6785a 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -133,7 +133,8 @@ impl Approvable for ShellRuntime { || matches!( req.exec_approval_requirement, ExecApprovalRequirement::Skip { - bypass_sandbox: true + bypass_sandbox: true, + .. } ) { diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index 4c1cbb83ec..d21e6de1e2 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -154,7 +154,8 @@ impl Approvable for UnifiedExecRuntime<'_> { || matches!( req.exec_approval_requirement, ExecApprovalRequirement::Skip { - bypass_sandbox: true + bypass_sandbox: true, + .. } ) { diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index 94c81043cc..5e69696923 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -95,6 +95,9 @@ pub(crate) enum ExecApprovalRequirement { /// The first attempt should skip sandboxing (e.g., when explicitly /// greenlit by policy). bypass_sandbox: bool, + /// Proposed execpolicy amendment to skip future approvals for similar commands + /// Only applies if the command fails to run in sandbox and codex prompts the user to run outside the sandbox. + proposed_execpolicy_amendment: Option, }, /// Approval required for this tool call. NeedsApproval { @@ -114,6 +117,10 @@ impl ExecApprovalRequirement { proposed_execpolicy_amendment: Some(prefix), .. } => Some(prefix), + Self::Skip { + proposed_execpolicy_amendment: Some(prefix), + .. + } => Some(prefix), _ => None, } } @@ -140,6 +147,7 @@ pub(crate) fn default_exec_approval_requirement( } else { ExecApprovalRequirement::Skip { bypass_sandbox: false, + proposed_execpolicy_amendment: None, } } } diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index e72becd882..89a71b0edc 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -804,10 +804,16 @@ pub(crate) fn create_tools_json_for_chat_completions_api( } if let Some(map) = tool.as_object_mut() { + let name = map + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); // Remove "type" field as it is not needed in chat completions. map.remove("type"); Some(json!({ "type": "function", + "name": name, "function": map, })) } else { @@ -2083,4 +2089,58 @@ Examples of valid command strings: }) ); } + + #[test] + fn chat_tools_include_top_level_name() { + let mut properties = BTreeMap::new(); + properties.insert("foo".to_string(), JsonSchema::String { description: None }); + let tools = vec![ToolSpec::Function(ResponsesApiTool { + name: "demo".to_string(), + description: "A demo tool".to_string(), + strict: false, + parameters: JsonSchema::Object { + properties, + required: None, + additional_properties: None, + }, + })]; + + let responses_json = create_tools_json_for_responses_api(&tools).unwrap(); + assert_eq!( + responses_json, + vec![json!({ + "type": "function", + "name": "demo", + "description": "A demo tool", + "strict": false, + "parameters": { + "type": "object", + "properties": { + "foo": { "type": "string" } + }, + }, + })] + ); + + let tools_json = create_tools_json_for_chat_completions_api(&tools).unwrap(); + + assert_eq!( + tools_json, + vec![json!({ + "type": "function", + "name": "demo", + "function": { + "name": "demo", + "description": "A demo tool", + "strict": false, + "parameters": { + "type": "object", + "properties": { + "foo": { "type": "string" } + }, + }, + } + })] + ); + } } diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index e42b4ac943..c67daeda87 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -677,7 +677,14 @@ pub async fn start_mock_server() -> MockServer { .await; // Provide a default `/models` response so tests remain hermetic when the client queries it. - let _ = mount_models_once(&server, ModelsResponse { models: Vec::new() }).await; + let _ = mount_models_once( + &server, + ModelsResponse { + models: Vec::new(), + etag: String::new(), + }, + ) + .await; server } diff --git a/codex-rs/core/tests/suite/deprecation_notice.rs b/codex-rs/core/tests/suite/deprecation_notice.rs index 4e240f0a07..bab715ebd8 100644 --- a/codex-rs/core/tests/suite/deprecation_notice.rs +++ b/codex-rs/core/tests/suite/deprecation_notice.rs @@ -36,7 +36,7 @@ async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<() let DeprecationNoticeEvent { summary, details } = notice; assert_eq!( summary, - "`use_experimental_unified_exec_tool` is deprecated. Use `unified_exec` instead." + "`use_experimental_unified_exec_tool` is deprecated. Use `[features].unified_exec` instead." .to_string(), ); assert_eq!( diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index e2d78004a5..2112cbb7aa 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -41,6 +41,7 @@ mod otel; mod prompt_caching; mod quota_exceeded; mod read_file; +mod remote_models; mod resume; mod review; mod rmcp_client; diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs new file mode 100644 index 0000000000..b13188d5d1 --- /dev/null +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -0,0 +1,184 @@ +#![cfg(not(target_os = "windows"))] +// unified exec is not supported on Windows OS +use std::sync::Arc; + +use anyhow::Result; +use codex_core::features::Feature; +use codex_core::openai_models::models_manager::ModelsManager; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::ExecCommandSource; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::openai_models::ClientVersion; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ModelsResponse; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::user_input::UserInput; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_models_once; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::skip_if_no_network; +use core_test_support::skip_if_sandbox; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use core_test_support::wait_for_event_match; +use serde_json::json; +use tokio::time::Duration; +use tokio::time::Instant; +use tokio::time::sleep; +use wiremock::BodyPrintLimit; +use wiremock::MockServer; + +const REMOTE_MODEL_SLUG: &str = "codex-test"; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let remote_model = ModelInfo { + slug: REMOTE_MODEL_SLUG.to_string(), + display_name: "Remote Test".to_string(), + description: Some("A remote model that requires the test shell".to_string()), + default_reasoning_level: ReasoningEffort::Medium, + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }], + shell_type: ConfigShellToolType::UnifiedExec, + visibility: ModelVisibility::List, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority: 1, + upgrade: None, + }; + + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + etag: String::new(), + }, + ) + .await; + + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::RemoteModels); + config.model = "gpt-5.1".to_string(); + }); + + let TestCodex { + codex, + cwd, + config, + conversation_manager, + .. + } = builder.build(&server).await?; + + let models_manager = conversation_manager.get_models_manager(); + let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG).await; + + assert_eq!(available_model.model, REMOTE_MODEL_SLUG); + + let requests = models_mock.requests(); + assert_eq!( + requests.len(), + 1, + "expected a single /models refresh request for the remote models feature" + ); + assert_eq!(requests[0].url.path(), "/v1/models"); + + let family = models_manager + .construct_model_family(REMOTE_MODEL_SLUG, &config) + .await; + assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec); + + codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: None, + sandbox_policy: None, + model: Some(REMOTE_MODEL_SLUG.to_string()), + effort: None, + summary: None, + }) + .await?; + + let call_id = "call"; + let args = json!({ + "cmd": "/bin/echo call", + "yield_time_ms": 250, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "run call".into(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: REMOTE_MODEL_SLUG.to_string(), + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + let begin_event = wait_for_event_match(&codex, |msg| match msg { + EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()), + _ => None, + }) + .await; + + assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup); + + wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + + Ok(()) +} + +async fn wait_for_model_available(manager: &Arc, slug: &str) -> ModelPreset { + let deadline = Instant::now() + Duration::from_secs(2); + loop { + if let Some(model) = { + let guard = manager.list_models().await; + guard.iter().find(|model| model.model == slug).cloned() + } { + return model; + } + if Instant::now() >= deadline { + panic!("timed out waiting for the remote model {slug} to appear"); + } + sleep(Duration::from_millis(25)).await; + } +} diff --git a/codex-rs/exec-server/Cargo.toml b/codex-rs/exec-server/Cargo.toml index ab6ca80a12..a0bd534934 100644 --- a/codex-rs/exec-server/Cargo.toml +++ b/codex-rs/exec-server/Cargo.toml @@ -56,5 +56,9 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } [dev-dependencies] +assert_cmd = { workspace = true } +exec_server_test_support = { workspace = true } +maplit = { workspace = true } pretty_assertions = { workspace = true } tempfile = { workspace = true } +which = { workspace = true } diff --git a/codex-rs/exec-server/src/lib.rs b/codex-rs/exec-server/src/lib.rs index adec09d4de..62f7bbccca 100644 --- a/codex-rs/exec-server/src/lib.rs +++ b/codex-rs/exec-server/src/lib.rs @@ -6,3 +6,6 @@ pub use posix::main_execve_wrapper; #[cfg(unix)] pub use posix::main_mcp_server; + +#[cfg(unix)] +pub use posix::ExecResult; diff --git a/codex-rs/exec-server/src/posix.rs b/codex-rs/exec-server/src/posix.rs index 16da5885f5..1a4b0a0e1f 100644 --- a/codex-rs/exec-server/src/posix.rs +++ b/codex-rs/exec-server/src/posix.rs @@ -82,6 +82,8 @@ mod mcp_escalation_policy; mod socket; mod stopwatch; +pub use mcp::ExecResult; + /// Default value of --execve option relative to the current executable. /// Note this must match the name of the binary as specified in Cargo.toml. const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper"; diff --git a/codex-rs/exec-server/src/posix/mcp.rs b/codex-rs/exec-server/src/posix/mcp.rs index bbbddc22e6..1376d46b72 100644 --- a/codex-rs/exec-server/src/posix/mcp.rs +++ b/codex-rs/exec-server/src/posix/mcp.rs @@ -54,7 +54,7 @@ pub struct ExecParams { pub login: Option, } -#[derive(Debug, serde::Serialize, schemars::JsonSchema)] +#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ExecResult { pub exit_code: i32, pub output: String, diff --git a/codex-rs/exec-server/tests/all.rs b/codex-rs/exec-server/tests/all.rs new file mode 100644 index 0000000000..7e136e4cce --- /dev/null +++ b/codex-rs/exec-server/tests/all.rs @@ -0,0 +1,3 @@ +// Single integration test binary that aggregates all test modules. +// The submodules live in `tests/suite/`. +mod suite; diff --git a/codex-rs/exec-server/tests/common/Cargo.toml b/codex-rs/exec-server/tests/common/Cargo.toml new file mode 100644 index 0000000000..ba7d2af0a1 --- /dev/null +++ b/codex-rs/exec-server/tests/common/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "exec_server_test_support" +version.workspace = true +edition.workspace = true +license.workspace = true + +[lib] +path = "lib.rs" + +[dependencies] +assert_cmd = { workspace = true } +anyhow = { workspace = true } +codex-core = { workspace = true } +rmcp = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } diff --git a/codex-rs/exec-server/tests/common/lib.rs b/codex-rs/exec-server/tests/common/lib.rs new file mode 100644 index 0000000000..c6df5c32c7 --- /dev/null +++ b/codex-rs/exec-server/tests/common/lib.rs @@ -0,0 +1,167 @@ +use codex_core::MCP_SANDBOX_STATE_NOTIFICATION; +use codex_core::SandboxState; +use codex_core::protocol::SandboxPolicy; +use rmcp::ClientHandler; +use rmcp::ErrorData as McpError; +use rmcp::RoleClient; +use rmcp::Service; +use rmcp::model::ClientCapabilities; +use rmcp::model::ClientInfo; +use rmcp::model::CreateElicitationRequestParam; +use rmcp::model::CreateElicitationResult; +use rmcp::model::CustomClientNotification; +use rmcp::model::ElicitationAction; +use rmcp::service::RunningService; +use rmcp::transport::ConfigureCommandExt; +use rmcp::transport::TokioChildProcess; +use serde_json::json; +use std::collections::HashSet; +use std::path::Path; +use std::path::PathBuf; +use std::process::Stdio; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::process::Command; + +pub fn create_transport

(codex_home: P) -> anyhow::Result +where + P: AsRef, +{ + let mcp_executable = assert_cmd::Command::cargo_bin("codex-exec-mcp-server")?; + let execve_wrapper = assert_cmd::Command::cargo_bin("codex-execve-wrapper")?; + let bash = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("tests") + .join("suite") + .join("bash"); + + let transport = + TokioChildProcess::new(Command::new(mcp_executable.get_program()).configure(|cmd| { + cmd.arg("--bash").arg(bash); + cmd.arg("--execve").arg(execve_wrapper.get_program()); + cmd.env("CODEX_HOME", codex_home.as_ref()); + + // Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + + // Optional but very helpful while debugging: + cmd.stderr(Stdio::inherit()); + }))?; + + Ok(transport) +} + +pub async fn write_default_execpolicy

(policy: &str, codex_home: P) -> anyhow::Result<()> +where + P: AsRef, +{ + let policy_dir = codex_home.as_ref().join("policy"); + tokio::fs::create_dir_all(&policy_dir).await?; + tokio::fs::write(policy_dir.join("default.codexpolicy"), policy).await?; + Ok(()) +} + +pub async fn notify_readable_sandbox( + sandbox_cwd: P, + codex_linux_sandbox_exe: Option, + service: &RunningService, +) -> anyhow::Result<()> +where + P: AsRef, + S: Service + ClientHandler, +{ + let sandbox_state = SandboxState { + sandbox_policy: SandboxPolicy::ReadOnly, + codex_linux_sandbox_exe, + sandbox_cwd: sandbox_cwd.as_ref().to_path_buf(), + }; + send_sandbox_notification(sandbox_state, service).await +} + +pub async fn notify_writable_sandbox_only_one_folder( + writable_folder: P, + codex_linux_sandbox_exe: Option, + service: &RunningService, +) -> anyhow::Result<()> +where + P: AsRef, + S: Service + ClientHandler, +{ + let sandbox_state = SandboxState { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + // Note that sandbox_cwd will already be included as a writable root + // when the sandbox policy is expanded. + writable_roots: vec![], + network_access: false, + // Disable writes to temp dir because this is a test, so + // writable_folder is likely also under /tmp and we want to be + // strict about what is writable. + exclude_tmpdir_env_var: true, + exclude_slash_tmp: true, + }, + codex_linux_sandbox_exe, + sandbox_cwd: writable_folder.as_ref().to_path_buf(), + }; + send_sandbox_notification(sandbox_state, service).await +} + +async fn send_sandbox_notification( + sandbox_state: SandboxState, + service: &RunningService, +) -> anyhow::Result<()> +where + S: Service + ClientHandler, +{ + let sandbox_state_notification = CustomClientNotification::new( + MCP_SANDBOX_STATE_NOTIFICATION, + Some(serde_json::to_value(sandbox_state)?), + ); + service + .send_notification(sandbox_state_notification.into()) + .await?; + Ok(()) +} + +pub struct InteractiveClient { + pub elicitations_to_accept: HashSet, + pub elicitation_requests: Arc>>, +} + +impl ClientHandler for InteractiveClient { + fn get_info(&self) -> ClientInfo { + let capabilities = ClientCapabilities::builder().enable_elicitation().build(); + ClientInfo { + capabilities, + ..Default::default() + } + } + + fn create_elicitation( + &self, + request: CreateElicitationRequestParam, + _context: rmcp::service::RequestContext, + ) -> impl std::future::Future> + Send + '_ + { + self.elicitation_requests + .lock() + .unwrap() + .push(request.clone()); + + let accept = self.elicitations_to_accept.contains(&request.message); + async move { + if accept { + Ok(CreateElicitationResult { + action: ElicitationAction::Accept, + content: Some(json!({ "approve": true })), + }) + } else { + Ok(CreateElicitationResult { + action: ElicitationAction::Decline, + content: None, + }) + } + } + } +} diff --git a/codex-rs/exec-server/tests/suite/accept_elicitation.rs b/codex-rs/exec-server/tests/suite/accept_elicitation.rs new file mode 100644 index 0000000000..2093f9a577 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/accept_elicitation.rs @@ -0,0 +1,131 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] +use std::borrow::Cow; +use std::sync::Arc; +use std::sync::Mutex; + +use anyhow::Result; +use codex_exec_server::ExecResult; +use exec_server_test_support::InteractiveClient; +use exec_server_test_support::create_transport; +use exec_server_test_support::notify_readable_sandbox; +use exec_server_test_support::write_default_execpolicy; +use maplit::hashset; +use pretty_assertions::assert_eq; +use rmcp::ServiceExt; +use rmcp::model::CallToolRequestParam; +use rmcp::model::CallToolResult; +use rmcp::model::CreateElicitationRequestParam; +use rmcp::model::object; +use serde_json::json; +use std::os::unix::fs::symlink; +use tempfile::TempDir; + +/// Verify that when using a read-only sandbox and an execpolicy that prompts, +/// the proper elicitation is sent. Upon auto-approving the elicitation, the +/// command should be run privileged outside the sandbox. +#[tokio::test(flavor = "current_thread")] +async fn accept_elicitation_for_prompt_rule() -> Result<()> { + // Configure a stdio transport that will launch the MCP server using + // $CODEX_HOME with an execpolicy that prompts for `git init` commands. + let codex_home = TempDir::new()?; + write_default_execpolicy( + r#" +# Create a rule with `decision = "prompt"` to exercise the elicitation flow. +prefix_rule( + pattern = ["git", "init"], + decision = "prompt", + match = [ + "git init ." + ], +) +"#, + codex_home.as_ref(), + ) + .await?; + let transport = create_transport(codex_home.as_ref())?; + + // Create an MCP client that approves expected elicitation messages. + let project_root = TempDir::new()?; + let git = which::which("git")?; + let project_root_path = project_root.path().canonicalize().unwrap(); + let expected_elicitation_message = format!( + "Allow agent to run `{} init .` in `{}`?", + git.display(), + project_root_path.display() + ); + let elicitation_requests: Arc>> = Default::default(); + let client = InteractiveClient { + elicitations_to_accept: hashset! { expected_elicitation_message.clone() }, + elicitation_requests: elicitation_requests.clone(), + }; + + // Start the MCP server. + let service: rmcp::service::RunningService = + client.serve(transport).await?; + + // Notify the MCP server about the current sandbox state before making any + // `shell` tool calls. + let linux_sandbox_exe_folder = TempDir::new()?; + let codex_linux_sandbox_exe = if cfg!(target_os = "linux") { + let codex_linux_sandbox_exe = linux_sandbox_exe_folder.path().join("codex-linux-sandbox"); + let codex_cli = assert_cmd::Command::cargo_bin("codex")? + .get_program() + .to_os_string(); + let codex_cli_path = std::path::PathBuf::from(codex_cli); + symlink(&codex_cli_path, &codex_linux_sandbox_exe)?; + Some(codex_linux_sandbox_exe) + } else { + None + }; + notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?; + + // Call the shell tool and verify that an elicitation was created and + // auto-approved. + let CallToolResult { + content, is_error, .. + } = service + .call_tool(CallToolRequestParam { + name: Cow::Borrowed("shell"), + arguments: Some(object(json!( + { + "command": "git init .", + "workdir": project_root_path.to_string_lossy(), + } + ))), + }) + .await?; + let tool_call_content = content + .first() + .expect("expected non-empty content") + .as_text() + .expect("expected text content"); + let ExecResult { + exit_code, output, .. + } = serde_json::from_str::(&tool_call_content.text)?; + let git_init_succeeded = format!( + "Initialized empty Git repository in {}/.git/\n", + project_root_path.display() + ); + // Normally, this would be an exact match, but it might include extra output + // if `git config set advice.defaultBranchName false` has not been set. + assert!( + output.contains(&git_init_succeeded), + "expected output `{output}` to contain `{git_init_succeeded}`" + ); + assert_eq!(exit_code, 0, "command should succeed"); + assert_eq!(is_error, Some(false), "command should succeed"); + assert!( + project_root_path.join(".git").is_dir(), + "git repo should exist" + ); + + let elicitation_messages = elicitation_requests + .lock() + .unwrap() + .iter() + .map(|r| r.message.clone()) + .collect::>(); + assert_eq!(vec![expected_elicitation_message], elicitation_messages); + + Ok(()) +} diff --git a/codex-rs/exec-server/tests/suite/bash b/codex-rs/exec-server/tests/suite/bash new file mode 100755 index 0000000000..5f5d1e5593 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/bash @@ -0,0 +1,75 @@ +#!/usr/bin/env dotslash + +// This is an instance of the fork of Bash that we bundle with +// https://www.npmjs.com/package/@openai/codex-shell-tool-mcp. +// Fetching the prebuilt version via DotSlash makes it easier to write +// integration tests for the MCP server. +// +// TODO(mbolin): Currently, we use a .tgz artifact that includes binaries for +// multiple platforms, but we could save a bit of space by making arch-specific +// artifacts available in the GitHub releases and referencing those here. +{ + "name": "codex-bash", + "platforms": { + // macOS 13 builds (and therefore x86_64) were dropped in + // https://github.com/openai/codex/pull/7295, so we only provide an + // Apple Silicon build for now. + "macos-aarch64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/aarch64-apple-darwin/bash/macos-15/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + // Note the `musl` parts of the Linux paths are misleading: the Bash + // binaries are actually linked against `glibc`, but the + // `codex-execve-wrapper` that invokes them is linked against `musl`. + "linux-x86_64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/x86_64-unknown-linux-musl/bash/ubuntu-24.04/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + "linux-aarch64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/aarch64-unknown-linux-musl/bash/ubuntu-24.04/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + } +} diff --git a/codex-rs/exec-server/tests/suite/list_tools.rs b/codex-rs/exec-server/tests/suite/list_tools.rs new file mode 100644 index 0000000000..17505c7613 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/list_tools.rs @@ -0,0 +1,76 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] +use std::borrow::Cow; +use std::fs; +use std::sync::Arc; + +use anyhow::Result; +use exec_server_test_support::create_transport; +use pretty_assertions::assert_eq; +use rmcp::ServiceExt; +use rmcp::model::Tool; +use rmcp::model::object; +use serde_json::json; +use tempfile::TempDir; + +/// Verify the list_tools call to the MCP server returns the expected response. +#[tokio::test(flavor = "current_thread")] +async fn list_tools() -> Result<()> { + let codex_home = TempDir::new()?; + let policy_dir = codex_home.path().join("policy"); + fs::create_dir_all(&policy_dir)?; + fs::write( + policy_dir.join("default.codexpolicy"), + r#"prefix_rule(pattern=["ls"], decision="prompt")"#, + )?; + let transport = create_transport(codex_home.path())?; + + let service = ().serve(transport).await?; + let tools = service.list_tools(Default::default()).await?.tools; + assert_eq!( + vec![Tool { + name: Cow::Borrowed("shell"), + title: None, + description: Some(Cow::Borrowed( + "Runs a shell command and returns its output. You MUST provide the workdir as an absolute path." + )), + input_schema: Arc::new(object(json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "command": { + "description": "The bash string to execute.", + "type": "string", + }, + "login": { + "description": "Launch Bash with -lc instead of -c: defaults to true.", + "nullable": true, + "type": "boolean", + }, + "timeout_ms": { + "description": "The timeout for the command in milliseconds.", + "format": "uint64", + "minimum": 0, + "nullable": true, + "type": "integer", + }, + "workdir": { + "description": "The working directory to execute the command in. Must be an absolute path.", + "type": "string", + }, + }, + "required": [ + "command", + "workdir", + ], + "title": "ExecParams", + "type": "object", + }))), + output_schema: None, + annotations: None, + icons: None, + meta: None + }], + tools + ); + + Ok(()) +} diff --git a/codex-rs/exec-server/tests/suite/mod.rs b/codex-rs/exec-server/tests/suite/mod.rs new file mode 100644 index 0000000000..3a94f58579 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/mod.rs @@ -0,0 +1,8 @@ +// TODO(mbolin): Get this test working on Linux. Currently, it fails with: +// +// > Error: Mcp error: -32603: sandbox error: sandbox denied exec error, +// > exit code: 1, stdout: , stderr: Error: failed to send handshake datagram +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +mod accept_elicitation; +#[cfg(any(all(target_os = "macos", target_arch = "aarch64"), target_os = "linux"))] +mod list_tools; diff --git a/codex-rs/login/src/device_code_auth.rs b/codex-rs/login/src/device_code_auth.rs index a121de7ebd..d9e7d90ce2 100644 --- a/codex-rs/login/src/device_code_auth.rs +++ b/codex-rs/login/src/device_code_auth.rs @@ -141,7 +141,7 @@ fn print_device_code_prompt(code: &str) { println!( "\nWelcome to Codex [v{ANSI_GRAY}{version}{ANSI_RESET}]\n{ANSI_GRAY}OpenAI's command-line coding agent{ANSI_RESET}\n\ \nFollow these steps to sign in with ChatGPT using device code authorization:\n\ -\n1. Open this link in your browser\n {ANSI_BLUE}https://auth.openai.com/codex/device{ANSI_RESET}\n\ +\n1. Open this link in your browser and sign in to your account\n {ANSI_BLUE}https://auth.openai.com/codex/device{ANSI_RESET}\n\ \n2. Enter this one-time code {ANSI_GRAY}(expires in 15 minutes){ANSI_RESET}\n {ANSI_BLUE}{code}{ANSI_RESET}\n\ \n{ANSI_GRAY}Device codes are a common phishing target. Never share this code.{ANSI_RESET}\n", version = env!("CARGO_PKG_VERSION"), diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 02d50627ca..942303a902 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use strum::IntoEnumIterator; use strum_macros::Display; use strum_macros::EnumIter; use ts_rs::TS; @@ -36,7 +37,7 @@ pub enum ReasoningEffort { } /// A reasoning effort option that can be surfaced for a model. -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)] pub struct ReasoningEffortPreset { /// Effort level that the model supports. pub effort: ReasoningEffort, @@ -123,7 +124,7 @@ pub struct ModelInfo { #[serde(default)] pub description: Option, pub default_reasoning_level: ReasoningEffort, - pub supported_reasoning_levels: Vec, + pub supported_reasoning_levels: Vec, pub shell_type: ConfigShellToolType, #[serde(default = "default_visibility")] pub visibility: ModelVisibility, @@ -132,12 +133,16 @@ pub struct ModelInfo { pub supported_in_api: bool, #[serde(default)] pub priority: i32, + #[serde(default)] + pub upgrade: Option, } /// Response wrapper for `/models`. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, TS, JsonSchema, Default)] pub struct ModelsResponse { pub models: Vec, + #[serde(default)] + pub etag: String, } fn default_visibility() -> ModelVisibility { @@ -149,22 +154,57 @@ impl From for ModelPreset { fn from(info: ModelInfo) -> Self { ModelPreset { id: info.slug.clone(), - model: info.slug, + model: info.slug.clone(), display_name: info.display_name, description: info.description.unwrap_or_default(), default_reasoning_effort: info.default_reasoning_level, - supported_reasoning_efforts: info - .supported_reasoning_levels - .into_iter() - .map(|level| ReasoningEffortPreset { - effort: level, - // todo: add description for each reasoning effort - description: level.to_string(), - }) - .collect(), + supported_reasoning_efforts: info.supported_reasoning_levels.clone(), is_default: false, // default is the highest priority available model - upgrade: None, // no upgrade available (todo: think about it) + upgrade: info.upgrade.as_ref().map(|upgrade_slug| ModelUpgrade { + id: upgrade_slug.clone(), + reasoning_effort_mapping: reasoning_effort_mapping_from_presets( + &info.supported_reasoning_levels, + ), + migration_config_key: info.slug.clone(), + }), show_in_picker: info.visibility == ModelVisibility::List, } } } + +fn reasoning_effort_mapping_from_presets( + presets: &[ReasoningEffortPreset], +) -> Option> { + if presets.is_empty() { + return None; + } + + // Map every canonical effort to the closest supported effort for the new model. + let supported: Vec = presets.iter().map(|p| p.effort).collect(); + let mut map = HashMap::new(); + for effort in ReasoningEffort::iter() { + let nearest = nearest_effort(effort, &supported); + map.insert(effort, nearest); + } + Some(map) +} + +fn effort_rank(effort: ReasoningEffort) -> i32 { + match effort { + ReasoningEffort::None => 0, + ReasoningEffort::Minimal => 1, + ReasoningEffort::Low => 2, + ReasoningEffort::Medium => 3, + ReasoningEffort::High => 4, + ReasoningEffort::XHigh => 5, + } +} + +fn nearest_effort(target: ReasoningEffort, supported: &[ReasoningEffort]) -> ReasoningEffort { + let target_rank = effort_rank(target); + supported + .iter() + .copied() + .min_by_key(|candidate| (effort_rank(*candidate) - target_rank).abs()) + .unwrap_or(target) +} diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 225a622dcc..89b5fd315a 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -1348,7 +1348,7 @@ pub struct ReviewLineRange { pub end: u32, } -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] +#[derive(Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] pub enum ExecCommandSource { Agent, diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 10d11d0535..06fb2a83e1 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -127,7 +127,7 @@ async fn handle_model_migration_prompt_if_needed( auth_mode: Option, models_manager: Arc, ) -> Option { - let available_models = models_manager.available_models.read().await.clone(); + let available_models = models_manager.list_models().await; let upgrade = available_models .iter() .find(|preset| preset.model == config.model) @@ -139,12 +139,12 @@ async fn handle_model_migration_prompt_if_needed( migration_config_key, }) = upgrade { - if !migration_prompt_allows_auth_mode(auth_mode, migration_config_key) { + if !migration_prompt_allows_auth_mode(auth_mode, migration_config_key.as_str()) { return None; } let target_model = target_model.to_string(); - let hide_prompt_flag = migration_prompt_hidden(config, migration_config_key); + let hide_prompt_flag = migration_prompt_hidden(config, migration_config_key.as_str()); if !should_show_model_migration_prompt( &config.model, &target_model, @@ -154,7 +154,7 @@ async fn handle_model_migration_prompt_if_needed( return None; } - let prompt_copy = migration_copy_for_config(migration_config_key); + let prompt_copy = migration_copy_for_config(migration_config_key.as_str()); match run_model_migration_prompt(tui, prompt_copy).await { ModelMigrationOutcome::Accepted => { app_event_tx.send(AppEvent::PersistModelMigrationPromptAcknowledged { diff --git a/codex-rs/tui/src/bottom_pane/command_popup.rs b/codex-rs/tui/src/bottom_pane/command_popup.rs index 39bbfbd182..8aca5c4a62 100644 --- a/codex-rs/tui/src/bottom_pane/command_popup.rs +++ b/codex-rs/tui/src/bottom_pane/command_popup.rs @@ -182,9 +182,9 @@ impl CommandPopup { GenericDisplayRow { name, match_indices: indices.map(|v| v.into_iter().map(|i| i + 1).collect()), - is_current: false, display_shortcut: None, description: Some(description), + wrap_indent: None, } }) .collect() diff --git a/codex-rs/tui/src/bottom_pane/file_search_popup.rs b/codex-rs/tui/src/bottom_pane/file_search_popup.rs index 708b004748..064e4f0137 100644 --- a/codex-rs/tui/src/bottom_pane/file_search_popup.rs +++ b/codex-rs/tui/src/bottom_pane/file_search_popup.rs @@ -129,9 +129,9 @@ impl WidgetRef for &FileSearchPopup { .indices .as_ref() .map(|v| v.iter().map(|&i| i as usize).collect()), - is_current: false, display_shortcut: None, description: None, + wrap_indent: None, }) .collect() }; diff --git a/codex-rs/tui/src/bottom_pane/list_selection_view.rs b/codex-rs/tui/src/bottom_pane/list_selection_view.rs index d294a47265..26a32a42e1 100644 --- a/codex-rs/tui/src/bottom_pane/list_selection_view.rs +++ b/codex-rs/tui/src/bottom_pane/list_selection_view.rs @@ -28,6 +28,7 @@ use super::scroll_state::ScrollState; use super::selection_popup_common::GenericDisplayRow; use super::selection_popup_common::measure_rows_height; use super::selection_popup_common::render_rows; +use unicode_width::UnicodeWidthStr; /// One selectable item in the generic selection list. pub(crate) type SelectionAction = Box; @@ -192,23 +193,26 @@ impl ListSelectionView { item.name.clone() }; let n = visible_idx + 1; - let display_name = if self.is_searchable { + let wrap_prefix = if self.is_searchable { // The number keys don't work when search is enabled (since we let the // numbers be used for the search query). - format!("{prefix} {name_with_marker}") + format!("{prefix} ") } else { - format!("{prefix} {n}. {name_with_marker}") + format!("{prefix} {n}. ") }; + let wrap_prefix_width = UnicodeWidthStr::width(wrap_prefix.as_str()); + let display_name = format!("{wrap_prefix}{name_with_marker}"); let description = is_selected .then(|| item.selected_description.clone()) .flatten() .or_else(|| item.description.clone()); + let wrap_indent = description.is_none().then_some(wrap_prefix_width); GenericDisplayRow { name: display_name, display_shortcut: item.display_shortcut, match_indices: None, - is_current: item.is_current, description, + wrap_indent, } }) }) @@ -264,13 +268,36 @@ impl ListSelectionView { impl BottomPaneView for ListSelectionView { fn handle_key_event(&mut self, key_event: KeyEvent) { match key_event { + // Some terminals (or configurations) send Control key chords as + // C0 control characters without reporting the CONTROL modifier. + // Handle fallbacks for Ctrl-P/N here so navigation works everywhere. KeyEvent { code: KeyCode::Up, .. - } => self.move_up(), + } + | KeyEvent { + code: KeyCode::Char('p'), + modifiers: KeyModifiers::CONTROL, + .. + } + | KeyEvent { + code: KeyCode::Char('\u{0010}'), + modifiers: KeyModifiers::NONE, + .. + } /* ^P */ => self.move_up(), KeyEvent { code: KeyCode::Down, .. - } => self.move_down(), + } + | KeyEvent { + code: KeyCode::Char('n'), + modifiers: KeyModifiers::CONTROL, + .. + } + | KeyEvent { + code: KeyCode::Char('\u{000e}'), + modifiers: KeyModifiers::NONE, + .. + } /* ^N */ => self.move_down(), KeyEvent { code: KeyCode::Backspace, .. @@ -558,6 +585,47 @@ mod tests { ); } + #[test] + fn wraps_long_option_without_overflowing_columns() { + let (tx_raw, _rx) = unbounded_channel::(); + let tx = AppEventSender::new(tx_raw); + let items = vec![ + SelectionItem { + name: "Yes, proceed".to_string(), + dismiss_on_select: true, + ..Default::default() + }, + SelectionItem { + name: "Yes, and don't ask again for commands that start with `python -mpre_commit run --files eslint-plugin/no-mixed-const-enum-exports.js`".to_string(), + dismiss_on_select: true, + ..Default::default() + }, + ]; + let view = ListSelectionView::new( + SelectionViewParams { + title: Some("Approval".to_string()), + items, + ..Default::default() + }, + tx, + ); + + let rendered = render_lines_with_width(&view, 60); + let command_line = rendered + .lines() + .find(|line| line.contains("python -mpre_commit run")) + .expect("rendered lines should include wrapped command"); + assert!( + command_line.starts_with(" `python -mpre_commit run"), + "wrapped command line should align under the numbered prefix:\n{rendered}" + ); + assert!( + rendered.contains("eslint-plugin/no-") + && rendered.contains("mixed-const-enum-exports.js"), + "long command should not be truncated even when wrapped:\n{rendered}" + ); + } + #[test] fn width_changes_do_not_hide_rows() { let (tx_raw, _rx) = unbounded_channel::(); diff --git a/codex-rs/tui/src/bottom_pane/selection_popup_common.rs b/codex-rs/tui/src/bottom_pane/selection_popup_common.rs index 8042a75b28..5107ab0ca9 100644 --- a/codex-rs/tui/src/bottom_pane/selection_popup_common.rs +++ b/codex-rs/tui/src/bottom_pane/selection_popup_common.rs @@ -19,8 +19,8 @@ pub(crate) struct GenericDisplayRow { pub name: String, pub display_shortcut: Option, pub match_indices: Option>, // indices to bold (char positions) - pub is_current: bool, - pub description: Option, // optional grey text after the name + pub description: Option, // optional grey text after the name + pub wrap_indent: Option, // optional indent for wrapped lines } /// Compute a shared description-column start based on the widest visible name @@ -47,13 +47,30 @@ fn compute_desc_col( desc_col } +/// Determine how many spaces to indent wrapped lines for a row. +fn wrap_indent(row: &GenericDisplayRow, desc_col: usize, max_width: u16) -> usize { + let max_indent = max_width.saturating_sub(1) as usize; + let indent = row.wrap_indent.unwrap_or_else(|| { + if row.description.is_some() { + desc_col + } else { + 0 + } + }); + indent.min(max_indent) +} + /// Build the full display line for a row with the description padded to start /// at `desc_col`. Applies fuzzy-match bolding when indices are present and /// dims the description. fn build_full_line(row: &GenericDisplayRow, desc_col: usize) -> Line<'static> { // Enforce single-line name: allow at most desc_col - 2 cells for name, // reserving two spaces before the description column. - let name_limit = desc_col.saturating_sub(2); + let name_limit = row + .description + .as_ref() + .map(|_| desc_col.saturating_sub(2)) + .unwrap_or(usize::MAX); let mut name_spans: Vec = Vec::with_capacity(row.name.len()); let mut used_width = 0usize; @@ -63,11 +80,12 @@ fn build_full_line(row: &GenericDisplayRow, desc_col: usize) -> Line<'static> { let mut idx_iter = idxs.iter().peekable(); for (char_idx, ch) in row.name.chars().enumerate() { let ch_w = UnicodeWidthChar::width(ch).unwrap_or(0); - if used_width + ch_w > name_limit { + let next_width = used_width.saturating_add(ch_w); + if next_width > name_limit { truncated = true; break; } - used_width += ch_w; + used_width = next_width; if idx_iter.peek().is_some_and(|next| **next == char_idx) { idx_iter.next(); @@ -79,11 +97,12 @@ fn build_full_line(row: &GenericDisplayRow, desc_col: usize) -> Line<'static> { } else { for ch in row.name.chars() { let ch_w = UnicodeWidthChar::width(ch).unwrap_or(0); - if used_width + ch_w > name_limit { + let next_width = used_width.saturating_add(ch_w); + if next_width > name_limit { truncated = true; break; } - used_width += ch_w; + used_width = next_width; name_spans.push(ch.to_string().into()); } } @@ -161,24 +180,7 @@ pub(crate) fn render_rows( break; } - let GenericDisplayRow { - name, - match_indices, - display_shortcut, - is_current: _is_current, - description, - } = row; - - let mut full_line = build_full_line( - &GenericDisplayRow { - name: name.clone(), - match_indices: match_indices.clone(), - display_shortcut: *display_shortcut, - is_current: *_is_current, - description: description.clone(), - }, - desc_col, - ); + let mut full_line = build_full_line(row, desc_col); if Some(i) == state.selected_idx { // Match previous behavior: cyan + bold for the selected row. // Reset the style first to avoid inheriting dim from keyboard shortcuts. @@ -190,9 +192,10 @@ pub(crate) fn render_rows( // Wrap with subsequent indent aligned to the description column. use crate::wrapping::RtOptions; use crate::wrapping::word_wrap_line; + let continuation_indent = wrap_indent(row, desc_col, area.width); let options = RtOptions::new(area.width as usize) .initial_indent(Line::from("")) - .subsequent_indent(Line::from(" ".repeat(desc_col))); + .subsequent_indent(Line::from(" ".repeat(continuation_indent))); let wrapped = word_wrap_line(&full_line, options); // Render the wrapped lines. @@ -256,9 +259,10 @@ pub(crate) fn measure_rows_height( .map(|(_, r)| r) { let full_line = build_full_line(row, desc_col); + let continuation_indent = wrap_indent(row, desc_col, content_width); let opts = RtOptions::new(content_width as usize) .initial_indent(Line::from("")) - .subsequent_indent(Line::from(" ".repeat(desc_col))); + .subsequent_indent(Line::from(" ".repeat(continuation_indent))); total = total.saturating_add(word_wrap_line(&full_line, opts).len() as u16); } total.max(1) diff --git a/codex-rs/tui/src/bottom_pane/skill_popup.rs b/codex-rs/tui/src/bottom_pane/skill_popup.rs index 74c1b137ca..3e0f79f84b 100644 --- a/codex-rs/tui/src/bottom_pane/skill_popup.rs +++ b/codex-rs/tui/src/bottom_pane/skill_popup.rs @@ -90,9 +90,9 @@ impl SkillPopup { GenericDisplayRow { name, match_indices: indices, - is_current: false, display_shortcut: None, description: Some(description), + wrap_indent: None, } }) .collect() diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 41fe181b33..2ddab1626a 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -2053,7 +2053,7 @@ impl ChatWidget { } fn lower_cost_preset(&self) -> Option { - let models = self.models_manager.available_models.try_read().ok()?; + let models = self.models_manager.try_list_models().ok()?; models .iter() .find(|preset| preset.model == NUDGE_MODEL_SLUG) @@ -2162,14 +2162,16 @@ impl ChatWidget { let current_model = self.config.model.clone(); let presets: Vec = // todo(aibrahim): make this async function - if let Ok(models) = self.models_manager.available_models.try_read() { - models.clone() - } else { - self.add_info_message( - "Models are being updated; please try /model again in a moment.".to_string(), - None, - ); - return; + match self.models_manager.try_list_models() { + Ok(models) => models, + Err(_) => { + self.add_info_message( + "Models are being updated; please try /model again in a moment." + .to_string(), + None, + ); + return; + } }; let mut items: Vec = Vec::new(); diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 229e075e7f..cebcd05d5a 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -956,9 +956,11 @@ fn active_blob(chat: &ChatWidget) -> String { } fn get_available_model(chat: &ChatWidget, model: &str) -> ModelPreset { - chat.models_manager - .available_models - .blocking_read() + let models = chat + .models_manager + .try_list_models() + .expect("models lock available"); + models .iter() .find(|&preset| preset.model == model) .cloned() diff --git a/codex-rs/tui/src/model_migration.rs b/codex-rs/tui/src/model_migration.rs index 1f93fd9a4f..cbce1f1bb0 100644 --- a/codex-rs/tui/src/model_migration.rs +++ b/codex-rs/tui/src/model_migration.rs @@ -292,7 +292,9 @@ fn gpt_5_1_codex_max_migration_copy() -> ModelMigrationCopy { ), Line::from(vec![ "Learn more at ".into(), - "www.openai.com/index/gpt-5-1-codex-max".cyan().underlined(), + "https://openai.com/index/gpt-5-1-codex-max/" + .cyan() + .underlined(), ".".into(), ]), ], @@ -312,7 +314,7 @@ fn gpt5_migration_copy() -> ModelMigrationCopy { ), Line::from(vec![ "Learn more at ".into(), - "www.openai.com/index/gpt-5-1".cyan().underlined(), + "https://openai.com/index/gpt-5-1/".cyan().underlined(), ".".into(), ]), Line::from(vec!["Press enter to continue".dim()]), diff --git a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt.snap b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt.snap index 5b3136803f..1f95142169 100644 --- a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt.snap +++ b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt.snap @@ -9,7 +9,7 @@ expression: terminal.backend() than its predecessors and capable of long-running project-scale work. - Learn more at www.openai.com/index/gpt-5-1-codex-max. + Learn more at https://openai.com/index/gpt-5-1-codex-max/. Choose how you'd like Codex to proceed. diff --git a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex.snap b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex.snap index 5a0ccd9b5b..52718e5793 100644 --- a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex.snap +++ b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex.snap @@ -10,6 +10,6 @@ expression: terminal.backend() You can continue using legacy models by specifying them directly with the -m option or in your config.toml. - Learn more at www.openai.com/index/gpt-5-1. + Learn more at https://openai.com/index/gpt-5-1/. Press enter to continue diff --git a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex_mini.snap b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex_mini.snap index 5a0ccd9b5b..52718e5793 100644 --- a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex_mini.snap +++ b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_codex_mini.snap @@ -10,6 +10,6 @@ expression: terminal.backend() You can continue using legacy models by specifying them directly with the -m option or in your config.toml. - Learn more at www.openai.com/index/gpt-5-1. + Learn more at https://openai.com/index/gpt-5-1/. Press enter to continue diff --git a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_family.snap b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_family.snap index 5a0ccd9b5b..52718e5793 100644 --- a/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_family.snap +++ b/codex-rs/tui/src/snapshots/codex_tui__model_migration__tests__model_migration_prompt_gpt5_family.snap @@ -10,6 +10,6 @@ expression: terminal.backend() You can continue using legacy models by specifying them directly with the -m option or in your config.toml. - Learn more at www.openai.com/index/gpt-5-1. + Learn more at https://openai.com/index/gpt-5-1/. Press enter to continue diff --git a/docs/config.md b/docs/config.md index 0ba711f02c..08ff2aa349 100644 --- a/docs/config.md +++ b/docs/config.md @@ -350,6 +350,8 @@ Though using this option may also be necessary if you try to use Codex in enviro ### tools.\* +These `[tools]` configuration options are deprecated. Use `[features]` instead (see [Feature flags](#feature-flags)). + Use the optional `[tools]` table to toggle built-in tools that the agent may call. `web_search` stays off unless you opt in, while `view_image` is now enabled by default: ```toml @@ -358,8 +360,6 @@ web_search = true # allow Codex to issue first-party web searches without prom view_image = false # disable image uploads (they're enabled by default) ``` -`web_search` is deprecated; use the `web_search_request` feature flag instead. - The `view_image` toggle is useful when you want to include screenshots or diagrams from your repo without pasting them manually. Codex still respects sandboxing: it can only attach files inside the workspace roots you allow. ### approval_presets @@ -615,12 +615,12 @@ Set `otel.exporter` to control where events go: endpoint, protocol, and headers your collector expects: ```toml - [otel] - exporter = { otlp-http = { - endpoint = "https://otel.example.com/v1/logs", - protocol = "binary", - headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" } - }} + [otel.exporter."otlp-http"] + endpoint = "https://otel.example.com/v1/logs" + protocol = "binary" + + [otel.exporter."otlp-http".headers] + "x-otlp-api-key" = "${OTLP_TOKEN}" ``` - `otlp-grpc` – streams OTLP log records over gRPC. Provide the endpoint and any @@ -628,27 +628,24 @@ Set `otel.exporter` to control where events go: ```toml [otel] - exporter = { otlp-grpc = { - endpoint = "https://otel.example.com:4317", - headers = { "x-otlp-meta" = "abc123" } - }} + exporter = { otlp-grpc = {endpoint = "https://otel.example.com:4317",headers = { "x-otlp-meta" = "abc123" }}} ``` Both OTLP exporters accept an optional `tls` block so you can trust a custom CA or enable mutual TLS. Relative paths are resolved against `~/.codex/`: ```toml -[otel] -exporter = { otlp-http = { - endpoint = "https://otel.example.com/v1/logs", - protocol = "binary", - headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }, - tls = { - ca-certificate = "certs/otel-ca.pem", - client-certificate = "/etc/codex/certs/client.pem", - client-private-key = "/etc/codex/certs/client-key.pem", - } -}} +[otel.exporter."otlp-http"] +endpoint = "https://otel.example.com/v1/logs" +protocol = "binary" + +[otel.exporter."otlp-http".headers] +"x-otlp-api-key" = "${OTLP_TOKEN}" + +[otel.exporter."otlp-http".tls] +ca-certificate = "certs/otel-ca.pem" +client-certificate = "/etc/codex/certs/client.pem" +client-private-key = "/etc/codex/certs/client-key.pem" ``` If the exporter is `none` nothing is written anywhere; otherwise you must run or point to your diff --git a/docs/example-config.md b/docs/example-config.md index f5b3c62904..1f326ac14b 100644 --- a/docs/example-config.md +++ b/docs/example-config.md @@ -341,30 +341,28 @@ environment = "dev" exporter = "none" # Example OTLP/HTTP exporter configuration -# [otel] -# exporter = { otlp-http = { -# endpoint = "https://otel.example.com/v1/logs", -# protocol = "binary", # "binary" | "json" -# headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" } -# }} +# [otel.exporter."otlp-http"] +# endpoint = "https://otel.example.com/v1/logs" +# protocol = "binary" # "binary" | "json" + +# [otel.exporter."otlp-http".headers] +# "x-otlp-api-key" = "${OTLP_TOKEN}" # Example OTLP/gRPC exporter configuration -# [otel] -# exporter = { otlp-grpc = { -# endpoint = "https://otel.example.com:4317", -# headers = { "x-otlp-meta" = "abc123" } -# }} +# [otel.exporter."otlp-grpc"] +# endpoint = "https://otel.example.com:4317", +# headers = { "x-otlp-meta" = "abc123" } # Example OTLP exporter with mutual TLS -# [otel] -# exporter = { otlp-http = { -# endpoint = "https://otel.example.com/v1/logs", -# protocol = "binary", -# headers = { "x-otlp-api-key" = "${OTLP_TOKEN}" }, -# tls = { -# ca-certificate = "certs/otel-ca.pem", -# client-certificate = "/etc/codex/certs/client.pem", -# client-private-key = "/etc/codex/certs/client-key.pem", -# } -# }} +# [otel.exporter."otlp-http"] +# endpoint = "https://otel.example.com/v1/logs" +# protocol = "binary" + +# [otel.exporter."otlp-http".headers] +# "x-otlp-api-key" = "${OTLP_TOKEN}" + +# [otel.exporter."otlp-http".tls] +# ca-certificate = "certs/otel-ca.pem" +# client-certificate = "/etc/codex/certs/client.pem" +# client-private-key = "/etc/codex/certs/client-key.pem" ```