add(core): safety check downgrade warning (#11964)

Add per-turn notice when a request is downgraded to a fallback model due to cyber safety checks. **Changes** - codex-api: Emit a ServerModel event based on the openai-model response header and/or response payload (SSE + WebSocket), including when the model changes mid-stream. - core: When the server-reported model differs from the requested model, emit a single per-turn warning explaining the reroute to gpt-5.2 and directing users to Trusted Access verification and the cyber safety explainer. - app-server (v2): Surface these cyber model-routing warnings as synthetic userMessage items with text prefixed by Warning: (and document this behavior).
2026-06-01 19:02:59 +00:00 · 2026-02-16 22:13:36 -08:00
parent 08f689843f
commit 02e9006547
12 changed files with 843 additions and 4 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -279,6 +279,8 @@ pub struct CodexSpawnOk {

 pub(crate) const INITIAL_SUBMIT_ID: &str = "";
 pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;
+const CYBER_VERIFY_URL: &str = "https://chatgpt.com/cyber";
+const CYBER_SAFETY_URL: &str = "https://developers.openai.com/codex/concepts/cyber-safety";

 impl Codex {
    /// Spawn a new [`Codex`] and initialize the session.
@@ -2560,6 +2562,35 @@ impl Session {
        self.record_conversation_items(ctx, &[item]).await;
    }

+    async fn maybe_warn_on_server_model_mismatch(
+        self: &Arc<Self>,
+        turn_context: &Arc<TurnContext>,
+        server_model: String,
+    ) -> bool {
+        let requested_model = turn_context.model_info.slug.as_str();
+        if server_model == requested_model {
+            info!("server reported model {server_model} (matches requested model)");
+            return false;
+        }
+
+        warn!("server reported model {server_model} while requested model was {requested_model}");
+
+        let warning_message = format!(
+            "Your account was flagged for potentially high-risk cyber activity and this request was routed to gpt-5.2 as a fallback. To regain access to gpt-5.3-codex, apply for trusted access: {CYBER_VERIFY_URL} or learn more: {CYBER_SAFETY_URL}"
+        );
+
+        self.send_event(
+            turn_context,
+            EventMsg::Warning(WarningEvent {
+                message: warning_message.clone(),
+            }),
+        )
+        .await;
+        self.record_model_warning(warning_message, turn_context)
+            .await;
+        true
+    }
+
    pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
        let mut state = self.state.lock().await;
        state.replace_history(items);
@@ -4435,6 +4466,7 @@ pub(crate) async fn run_turn(
    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
    // many turns, from the perspective of the user, it is a single turn.
    let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
+    let mut server_model_warning_emitted_for_turn = false;

    // `ModelClientSession` is turn-scoped and caches WebSocket + sticky routing state, so we reuse
    // one instance across retries within this turn.
@@ -4497,6 +4529,7 @@ pub(crate) async fn run_turn(
            sampling_request_input,
            &explicitly_enabled_connectors,
            skills_outcome.as_ref(),
+            &mut server_model_warning_emitted_for_turn,
            cancellation_token.child_token(),
        )
        .await
@@ -4870,6 +4903,7 @@ async fn run_sampling_request(
    input: Vec<ResponseItem>,
    explicitly_enabled_connectors: &HashSet<String>,
    skills_outcome: Option<&SkillLoadOutcome>,
+    server_model_warning_emitted_for_turn: &mut bool,
    cancellation_token: CancellationToken,
 ) -> CodexResult<SamplingRequestResult> {
    let router = built_tools(
@@ -4906,6 +4940,7 @@ async fn run_sampling_request(
            client_session,
            turn_metadata_header,
            Arc::clone(&turn_diff_tracker),
+            server_model_warning_emitted_for_turn,
            &prompt,
            cancellation_token.child_token(),
        )
@@ -5474,6 +5509,7 @@ async fn try_run_sampling_request(
    client_session: &mut ModelClientSession,
    turn_metadata_header: Option<&str>,
    turn_diff_tracker: SharedTurnDiffTracker,
+    server_model_warning_emitted_for_turn: &mut bool,
    prompt: &Prompt,
    cancellation_token: CancellationToken,
 ) -> CodexResult<SamplingRequestResult> {
@@ -5616,6 +5652,15 @@ async fn try_run_sampling_request(
                    active_item = Some(turn_item);
                }
            }
+            ResponseEvent::ServerModel(server_model) => {
+                if !*server_model_warning_emitted_for_turn
+                    && sess
+                        .maybe_warn_on_server_model_mismatch(&turn_context, server_model)
+                        .await
+                {
+                    *server_model_warning_emitted_for_turn = true;
+                }
+            }
            ResponseEvent::ServerReasoningIncluded(included) => {
                sess.set_server_reasoning_included(included).await;
            }