[codex] Cover WFP setup failure results

Keep the fail-closed WFP setup path exercised for installer errors and panics. Co-authored-by: Codex <noreply@openai.com>
[codex] Stop Windows sandbox setup after filter installation failure
2026-05-17 09:43:19 +00:00 · 2026-05-11 21:18:46 +00:00 · 2026-05-11 20:33:13 +00:00
7 changed files with 158 additions and 292 deletions
--- a/codex-rs/.config/nextest.toml
+++ b/codex-rs/.config/nextest.toml
@@ -11,9 +11,6 @@ max-threads = 1
 [test-groups.core_apply_patch_cli_integration]
 max-threads = 1

-[test-groups.windows_process_stdio_integration]
-max-threads = 1
-
 [test-groups.windows_sandbox_legacy_sessions]
 max-threads = 1

@@ -42,13 +39,6 @@ test-group = 'app_server_integration'
 filter = 'package(codex-core) & kind(test) & test(apply_patch_cli)'
 test-group = 'core_apply_patch_cli_integration'

-[[profile.default.overrides]]
-# These Windows-only tests all spawn Codex or Powershell child processes over
-# stdio, and the timeout-heavy failure cluster appears when they overlap.
-platform = 'cfg(windows)'
-filter = '(package(codex-exec) & test(exec_resume_)) | (package(codex-exec-server) & test(connect_stdio_command_initializes_json_rpc_client_on_windows))'
-test-group = 'windows_process_stdio_integration'
-
 [[profile.default.overrides]]
 # These tests create restricted-token Windows child processes and private desktops.
 # Serialize them to avoid exhausting Windows session/global desktop resources in CI.
--- a/codex-rs/login/src/auth/manager.rs
+++ b/codex-rs/login/src/auth/manager.rs
@@ -592,11 +592,11 @@ pub fn save_auth(
    storage.save(auth)
 }

-/// Load the raw stored auth payload without applying environment overrides.
-///
-/// Returns `None` when no credentials are stored. Prefer `AuthManager` for
-/// ordinary production reads; this helper is for tests and write-side
-/// maintenance that must inspect the exact payload in storage.
+/// Load CLI auth data using the configured credential store backend.
+/// Returns `None` when no credentials are stored. This function is
+/// provided only for tests. Production code should not directly load
+/// from the auth.json storage. It should use the AuthManager abstraction
+/// instead.
 pub fn load_auth_dot_json(
    codex_home: &Path,
    auth_credentials_store_mode: AuthCredentialsStoreMode,
--- a/codex-rs/login/src/auth/mod.rs
+++ b/codex-rs/login/src/auth/mod.rs
@@ -11,5 +11,3 @@ mod revoke;
 pub use error::RefreshTokenFailedError;
 pub use error::RefreshTokenFailedReason;
 pub use manager::*;
-pub(crate) use revoke::revoke_auth_tokens;
-pub(crate) use revoke::should_revoke_auth_tokens;
--- a/codex-rs/login/src/auth/revoke.rs
+++ b/codex-rs/login/src/auth/revoke.rs
@@ -1,9 +1,8 @@
-//! Best-effort OAuth token revocation for managed auth cleanup.
+//! Best-effort OAuth token revocation used during logout.
 //!
-//! Managed ChatGPT auth stores OAuth tokens locally. Cleanup attempts to revoke
-//! the refresh token, falling back to the access token when no refresh token is
-//! available, and callers still complete their primary work if the revoke request
-//! fails.
+//! Managed ChatGPT auth stores OAuth tokens locally. Logout attempts to revoke the
+//! refresh token, falling back to the access token when no refresh token is
+//! available, and callers still remove local auth if the revoke request fails.

 use serde::Serialize;
 use std::time::Duration;
@@ -52,43 +51,35 @@ struct RevokeTokenRequest<'a> {
    client_id: Option<&'static str>,
 }

-pub(crate) async fn revoke_auth_tokens(
+pub(super) async fn revoke_auth_tokens(
    auth_dot_json: Option<&AuthDotJson>,
 ) -> Result<(), std::io::Error> {
-    let Some((token, kind)) = auth_dot_json.and_then(revocable_token) else {
+    let Some(tokens) = auth_dot_json.and_then(managed_chatgpt_tokens) else {
        return Ok(());
    };

    let client = create_client();
    let endpoint = revoke_token_endpoint();
-    revoke_oauth_token(&client, endpoint.as_str(), token, kind, REVOKE_HTTP_TIMEOUT).await
-}
-
-pub(crate) fn should_revoke_auth_tokens(
-    auth_dot_json: Option<&AuthDotJson>,
-    replacement_auth: &AuthDotJson,
-) -> bool {
-    let Some((token, kind)) = auth_dot_json.and_then(revocable_token) else {
-        return false;
-    };
-    let Some(replacement_tokens) = managed_chatgpt_tokens(replacement_auth) else {
-        return true;
-    };
-
-    match kind {
-        RevokeTokenKind::Access => replacement_tokens.access_token != token,
-        RevokeTokenKind::Refresh => replacement_tokens.refresh_token != token,
-    }
-}
-
-fn revocable_token(auth_dot_json: &AuthDotJson) -> Option<(&str, RevokeTokenKind)> {
-    let tokens = managed_chatgpt_tokens(auth_dot_json)?;
    if !tokens.refresh_token.is_empty() {
-        Some((tokens.refresh_token.as_str(), RevokeTokenKind::Refresh))
+        revoke_oauth_token(
+            &client,
+            endpoint.as_str(),
+            tokens.refresh_token.as_str(),
+            RevokeTokenKind::Refresh,
+            REVOKE_HTTP_TIMEOUT,
+        )
+        .await
    } else if !tokens.access_token.is_empty() {
-        Some((tokens.access_token.as_str(), RevokeTokenKind::Access))
+        revoke_oauth_token(
+            &client,
+            endpoint.as_str(),
+            tokens.access_token.as_str(),
+            RevokeTokenKind::Access,
+            REVOKE_HTTP_TIMEOUT,
+        )
+        .await
    } else {
-        None
+        Ok(())
    }
 }

--- a/codex-rs/login/src/server.rs
+++ b/codex-rs/login/src/server.rs
@@ -25,10 +25,7 @@ use std::thread;
 use std::time::Duration;

 use crate::auth::AuthDotJson;
-use crate::auth::load_auth_dot_json;
-use crate::auth::revoke_auth_tokens;
 use crate::auth::save_auth;
-use crate::auth::should_revoke_auth_tokens;
 use crate::default_client::originator;
 use crate::pkce::PkceCodes;
 use crate::pkce::generate_pkce;
@@ -784,8 +781,7 @@ pub(crate) async fn exchange_code_for_tokens(
    })
 }

-/// Persists exchanged credentials using the configured local auth store, then
-/// best-effort revokes any superseded managed ChatGPT tokens.
+/// Persists exchanged credentials using the configured local auth store.
 pub(crate) async fn persist_tokens_async(
    codex_home: &Path,
    api_key: Option<String>,
@@ -796,14 +792,7 @@ pub(crate) async fn persist_tokens_async(
 ) -> io::Result<()> {
    // Reuse existing synchronous logic but run it off the async runtime.
    let codex_home = codex_home.to_path_buf();
-    let (previous_auth, auth) = tokio::task::spawn_blocking(move || {
-        let previous_auth = match load_auth_dot_json(&codex_home, auth_credentials_store_mode) {
-            Ok(auth) => auth,
-            Err(err) => {
-                warn!("failed to load previous auth before saving new login: {err}");
-                None
-            }
-        };
+    tokio::task::spawn_blocking(move || {
        let mut tokens = TokenData {
            id_token: parse_chatgpt_jwt_claims(&id_token).map_err(io::Error::other)?,
            access_token,
@@ -823,19 +812,10 @@ pub(crate) async fn persist_tokens_async(
            last_refresh: Some(Utc::now()),
            agent_identity: None,
        };
-        save_auth(&codex_home, &auth, auth_credentials_store_mode)?;
-        Ok::<_, io::Error>((previous_auth, auth))
+        save_auth(&codex_home, &auth, auth_credentials_store_mode)
    })
    .await
-    .map_err(|e| io::Error::other(format!("persist task failed: {e}")))??;
-
-    if should_revoke_auth_tokens(previous_auth.as_ref(), &auth)
-        && let Err(err) = revoke_auth_tokens(previous_auth.as_ref()).await
-    {
-        warn!("failed to revoke superseded auth tokens after login: {err}");
-    }
-
-    Ok(())
+    .map_err(|e| io::Error::other(format!("persist task failed: {e}")))?
 }

 fn compose_success_url(
@@ -1152,28 +1132,6 @@ pub(crate) async fn obtain_api_key(
 }
 #[cfg(test)]
 mod tests {
-    use std::ffi::OsString;
-
-    use anyhow::Context;
-    use base64::Engine;
-    use codex_app_server_protocol::AuthMode;
-    use codex_config::types::AuthCredentialsStoreMode;
-    use serde_json::Value;
-    use serde_json::json;
-    use tempfile::tempdir;
-    use wiremock::Mock;
-    use wiremock::MockServer;
-    use wiremock::ResponseTemplate;
-    use wiremock::matchers::method;
-    use wiremock::matchers::path;
-
-    use crate::auth::AuthDotJson;
-    use crate::auth::REVOKE_TOKEN_URL_OVERRIDE_ENV_VAR;
-    use crate::auth::load_auth_dot_json;
-    use crate::auth::save_auth;
-    use crate::token_data::TokenData;
-    use crate::token_data::parse_chatgpt_jwt_claims;
-    use core_test_support::skip_if_no_network;
    use pretty_assertions::assert_eq;

    use super::DEFAULT_ISSUER;
@@ -1182,179 +1140,11 @@ mod tests {
    use super::html_escape;
    use super::is_missing_codex_entitlement_error;
    use super::parse_token_endpoint_error;
-    use super::persist_tokens_async;
    use super::redact_sensitive_query_value;
    use super::redact_sensitive_url_parts;
    use super::render_login_error_page;
    use super::sanitize_url_for_logging;

-    #[serial_test::serial(logout_revoke)]
-    #[tokio::test]
-    async fn persist_tokens_async_revokes_previous_auth_without_failing_login() -> anyhow::Result<()>
-    {
-        skip_if_no_network!(Ok(()));
-
-        let server = MockServer::start().await;
-        Mock::given(method("POST"))
-            .and(path("/oauth/revoke"))
-            .respond_with(ResponseTemplate::new(500).set_body_json(json!({
-                "error": {
-                    "message": "revoke failed"
-                }
-            })))
-            .expect(1)
-            .mount(&server)
-            .await;
-        let _env_guard = EnvGuard::set(
-            REVOKE_TOKEN_URL_OVERRIDE_ENV_VAR,
-            format!("{}/oauth/revoke", server.uri()),
-        );
-
-        let codex_home = tempdir()?;
-        save_auth(
-            codex_home.path(),
-            &chatgpt_auth("old-access", "old-refresh", "old-account"),
-            AuthCredentialsStoreMode::File,
-        )?;
-
-        persist_tokens_async(
-            codex_home.path(),
-            /*api_key*/ None,
-            jwt_for_account("new-account"),
-            "new-access".to_string(),
-            "new-refresh".to_string(),
-            AuthCredentialsStoreMode::File,
-        )
-        .await?;
-
-        let auth = load_auth_dot_json(codex_home.path(), AuthCredentialsStoreMode::File)?
-            .context("auth.json should exist after login")?;
-        assert_eq!(
-            auth.tokens.context("new tokens should be persisted")?,
-            TokenData {
-                id_token: parse_chatgpt_jwt_claims(&jwt_for_account("new-account"))
-                    .expect("new JWT should parse"),
-                access_token: "new-access".to_string(),
-                refresh_token: "new-refresh".to_string(),
-                account_id: Some("new-account".to_string()),
-            }
-        );
-
-        let requests = server
-            .received_requests()
-            .await
-            .context("failed to fetch revoke requests")?;
-        assert_eq!(requests.len(), 1);
-        assert_eq!(
-            requests[0]
-                .body_json::<Value>()
-                .context("revoke request should be JSON")?,
-            json!({
-                "token": "old-refresh",
-                "token_type_hint": "refresh_token",
-                "client_id": crate::auth::CLIENT_ID,
-            })
-        );
-        server.verify().await;
-        Ok(())
-    }
-
-    #[serial_test::serial(logout_revoke)]
-    #[tokio::test]
-    async fn persist_tokens_async_does_not_revoke_reused_refresh_token() -> anyhow::Result<()> {
-        skip_if_no_network!(Ok(()));
-
-        let server = MockServer::start().await;
-        let _env_guard = EnvGuard::set(
-            REVOKE_TOKEN_URL_OVERRIDE_ENV_VAR,
-            format!("{}/oauth/revoke", server.uri()),
-        );
-
-        let codex_home = tempdir()?;
-        save_auth(
-            codex_home.path(),
-            &chatgpt_auth("old-access", "shared-refresh", "old-account"),
-            AuthCredentialsStoreMode::File,
-        )?;
-
-        persist_tokens_async(
-            codex_home.path(),
-            /*api_key*/ None,
-            jwt_for_account("new-account"),
-            "new-access".to_string(),
-            "shared-refresh".to_string(),
-            AuthCredentialsStoreMode::File,
-        )
-        .await?;
-
-        let requests = server
-            .received_requests()
-            .await
-            .context("failed to fetch revoke requests")?;
-        assert_eq!(requests.len(), 0);
-        Ok(())
-    }
-
-    fn chatgpt_auth(access_token: &str, refresh_token: &str, account_id: &str) -> AuthDotJson {
-        AuthDotJson {
-            auth_mode: Some(AuthMode::Chatgpt),
-            openai_api_key: None,
-            tokens: Some(TokenData {
-                id_token: parse_chatgpt_jwt_claims(&jwt_for_account(account_id))
-                    .expect("test JWT should parse"),
-                access_token: access_token.to_string(),
-                refresh_token: refresh_token.to_string(),
-                account_id: Some(account_id.to_string()),
-            }),
-            last_refresh: None,
-            agent_identity: None,
-        }
-    }
-
-    fn jwt_for_account(account_id: &str) -> String {
-        let encode = |bytes: &[u8]| base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes);
-        let header_b64 = encode(br#"{"alg":"none","typ":"JWT"}"#);
-        let payload_b64 = encode(
-            serde_json::to_string(&json!({
-                "https://api.openai.com/auth": {
-                    "chatgpt_account_id": account_id,
-                }
-            }))
-            .expect("payload should serialize")
-            .as_bytes(),
-        );
-        let signature_b64 = encode(b"sig");
-        format!("{header_b64}.{payload_b64}.{signature_b64}")
-    }
-
-    struct EnvGuard {
-        key: &'static str,
-        original: Option<OsString>,
-    }
-
-    impl EnvGuard {
-        fn set(key: &'static str, value: String) -> Self {
-            let original = std::env::var_os(key);
-            // SAFETY: this test executes serially with other revoke tests.
-            unsafe {
-                std::env::set_var(key, &value);
-            }
-            Self { key, original }
-        }
-    }
-
-    impl Drop for EnvGuard {
-        fn drop(&mut self) {
-            // SAFETY: the guard restores the original environment before other revoke tests run.
-            unsafe {
-                match &self.original {
-                    Some(value) => std::env::set_var(self.key, value),
-                    None => std::env::remove_var(self.key),
-                }
-            }
-        }
-    }
-
    #[test]
    fn parse_token_endpoint_error_prefers_error_description() {
        let detail = parse_token_endpoint_error(
--- a/codex-rs/windows-sandbox-rs/src/bin/setup_main/win.rs
+++ b/codex-rs/windows-sandbox-rs/src/bin/setup_main/win.rs
@@ -610,7 +610,13 @@ fn run_setup_full(payload: &Payload, log: &mut File, sbx_dir: &Path) -> Result<(
            |message| {
                let _ = log_line(log, message);
            },
-        );
+        )
+        .map_err(|err| {
+            anyhow::Error::new(SetupFailure::new(
+                SetupErrorCode::HelperFirewallRuleCreateOrAddFailed,
+                format!("install WFP filters failed: {err}"),
+            ))
+        })?;
    }

    if payload.read_roots.is_empty() {
--- a/codex-rs/windows-sandbox-rs/src/wfp_setup.rs
+++ b/codex-rs/windows-sandbox-rs/src/wfp_setup.rs
@@ -128,48 +128,139 @@ pub fn install_wfp_filters<F>(
    offline_username: &str,
    otel: Option<&StatsigMetricsSettings>,
    mut log: F,
-) where
+) -> Result<()>
+where
    F: FnMut(&str),
 {
-    let metric = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+    let (metric, install_result) = evaluate_wfp_install(offline_username, &mut log, || {
        install_wfp_filters_for_account(offline_username)
-    })) {
+    });
+
+    emit_wfp_setup_metric_safely(codex_home, otel, offline_username, &metric, &mut log);
+    install_result
+}
+
+fn evaluate_wfp_install<F, I>(
+    offline_username: &str,
+    log: &mut F,
+    install: I,
+) -> (WfpSetupMetric, Result<()>)
+where
+    F: FnMut(&str),
+    I: FnOnce() -> Result<usize>,
+{
+    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(install)) {
        Ok(Ok(installed_filter_count)) => {
            log(&format!(
                "WFP setup succeeded for {offline_username} with {installed_filter_count} installed filters"
            ));
-            WfpSetupMetric {
-                outcome: WfpSetupMetricOutcome::Success,
-                target_account: offline_username.to_string(),
-                installed_filter_count,
-                error: None,
-            }
+            (
+                WfpSetupMetric {
+                    outcome: WfpSetupMetricOutcome::Success,
+                    target_account: offline_username.to_string(),
+                    installed_filter_count,
+                    error: None,
+                },
+                Ok(()),
+            )
        }
        Ok(Err(err)) => {
            let error = err.to_string();
-            log(&format!(
-                "WFP setup failed for {offline_username}: {error}; continuing elevated setup"
-            ));
-            WfpSetupMetric {
-                outcome: WfpSetupMetricOutcome::Failure,
-                target_account: offline_username.to_string(),
-                installed_filter_count: 0,
-                error: Some(error),
-            }
+            log(&format!("WFP setup failed for {offline_username}: {error}"));
+            (
+                WfpSetupMetric {
+                    outcome: WfpSetupMetricOutcome::Failure,
+                    target_account: offline_username.to_string(),
+                    installed_filter_count: 0,
+                    error: Some(error.clone()),
+                },
+                Err(anyhow::anyhow!(
+                    "WFP setup failed for {offline_username}: {error}"
+                )),
+            )
        }
        Err(panic_payload) => {
            let error = panic_payload_to_string(panic_payload);
            log(&format!(
-                "WFP setup panicked for {offline_username}: {error}; continuing elevated setup"
+                "WFP setup panicked for {offline_username}: {error}"
            ));
-            WfpSetupMetric {
-                outcome: WfpSetupMetricOutcome::Failure,
-                target_account: offline_username.to_string(),
-                installed_filter_count: 0,
-                error: Some(format!("panic: {error}")),
-            }
+            (
+                WfpSetupMetric {
+                    outcome: WfpSetupMetricOutcome::Failure,
+                    target_account: offline_username.to_string(),
+                    installed_filter_count: 0,
+                    error: Some(format!("panic: {error}")),
+                },
+                Err(anyhow::anyhow!(
+                    "WFP setup panicked for {offline_username}: {error}"
+                )),
+            )
        }
-    };
-
-    emit_wfp_setup_metric_safely(codex_home, otel, offline_username, &metric, &mut log);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn evaluate_wfp_install_returns_ok_after_success() {
+        let mut messages = Vec::new();
+        let mut log = |message: &str| messages.push(message.to_string());
+
+        let (metric, result) = evaluate_wfp_install("offline", &mut log, || Ok(3));
+
+        assert!(result.is_ok());
+        assert!(matches!(metric.outcome, WfpSetupMetricOutcome::Success));
+        assert_eq!(metric.installed_filter_count, 3);
+        assert_eq!(metric.error, None);
+        assert_eq!(
+            messages,
+            vec!["WFP setup succeeded for offline with 3 installed filters"]
+        );
+    }
+
+    #[test]
+    fn evaluate_wfp_install_returns_err_after_install_failure() {
+        let mut messages = Vec::new();
+        let mut log = |message: &str| messages.push(message.to_string());
+
+        let (metric, result) =
+            evaluate_wfp_install("offline", &mut log, || Err(anyhow::anyhow!("driver error")));
+
+        let error = result.expect_err("install failures must bubble out");
+        assert_eq!(
+            error.to_string(),
+            "WFP setup failed for offline: driver error"
+        );
+        assert!(matches!(metric.outcome, WfpSetupMetricOutcome::Failure));
+        assert_eq!(metric.installed_filter_count, 0);
+        assert_eq!(metric.error.as_deref(), Some("driver error"));
+        assert_eq!(messages, vec!["WFP setup failed for offline: driver error"]);
+    }
+
+    #[test]
+    fn evaluate_wfp_install_returns_err_after_install_panic() {
+        let mut messages = Vec::new();
+        let mut log = |message: &str| messages.push(message.to_string());
+
+        let (metric, result) =
+            evaluate_wfp_install("offline", &mut log, || panic!("unexpected installer panic"));
+
+        let error = result.expect_err("installer panics must bubble out");
+        assert_eq!(
+            error.to_string(),
+            "WFP setup panicked for offline: unexpected installer panic"
+        );
+        assert!(matches!(metric.outcome, WfpSetupMetricOutcome::Failure));
+        assert_eq!(metric.installed_filter_count, 0);
+        assert_eq!(
+            metric.error.as_deref(),
+            Some("panic: unexpected installer panic")
+        );
+        assert_eq!(
+            messages,
+            vec!["WFP setup panicked for offline: unexpected installer panic"]
+        );
+    }
 }