add(core): safety check downgrade warning (#11964)

Add per-turn notice when a request is downgraded to a fallback model due
to cyber safety checks.

**Changes**

- codex-api: Emit a ServerModel event based on the openai-model response
header and/or response payload (SSE + WebSocket), including when the
model changes mid-stream.
- core: When the server-reported model differs from the requested model,
emit a single per-turn warning explaining the reroute to gpt-5.2 and
directing users to Trusted
    Access verification and the cyber safety explainer.
- app-server (v2): Surface these cyber model-routing warnings as
synthetic userMessage items with text prefixed by Warning: (and document
this behavior).
This commit is contained in:
Fouad Matin
2026-02-16 22:13:36 -08:00
committed by GitHub
parent 08f689843f
commit 02e9006547
12 changed files with 843 additions and 4 deletions

View File

@@ -279,6 +279,8 @@ pub struct CodexSpawnOk {
pub(crate) const INITIAL_SUBMIT_ID: &str = "";
pub(crate) const SUBMISSION_CHANNEL_CAPACITY: usize = 64;
const CYBER_VERIFY_URL: &str = "https://chatgpt.com/cyber";
const CYBER_SAFETY_URL: &str = "https://developers.openai.com/codex/concepts/cyber-safety";
impl Codex {
/// Spawn a new [`Codex`] and initialize the session.
@@ -2560,6 +2562,35 @@ impl Session {
self.record_conversation_items(ctx, &[item]).await;
}
async fn maybe_warn_on_server_model_mismatch(
self: &Arc<Self>,
turn_context: &Arc<TurnContext>,
server_model: String,
) -> bool {
let requested_model = turn_context.model_info.slug.as_str();
if server_model == requested_model {
info!("server reported model {server_model} (matches requested model)");
return false;
}
warn!("server reported model {server_model} while requested model was {requested_model}");
let warning_message = format!(
"Your account was flagged for potentially high-risk cyber activity and this request was routed to gpt-5.2 as a fallback. To regain access to gpt-5.3-codex, apply for trusted access: {CYBER_VERIFY_URL} or learn more: {CYBER_SAFETY_URL}"
);
self.send_event(
turn_context,
EventMsg::Warning(WarningEvent {
message: warning_message.clone(),
}),
)
.await;
self.record_model_warning(warning_message, turn_context)
.await;
true
}
pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
let mut state = self.state.lock().await;
state.replace_history(items);
@@ -4435,6 +4466,7 @@ pub(crate) async fn run_turn(
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
// many turns, from the perspective of the user, it is a single turn.
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let mut server_model_warning_emitted_for_turn = false;
// `ModelClientSession` is turn-scoped and caches WebSocket + sticky routing state, so we reuse
// one instance across retries within this turn.
@@ -4497,6 +4529,7 @@ pub(crate) async fn run_turn(
sampling_request_input,
&explicitly_enabled_connectors,
skills_outcome.as_ref(),
&mut server_model_warning_emitted_for_turn,
cancellation_token.child_token(),
)
.await
@@ -4870,6 +4903,7 @@ async fn run_sampling_request(
input: Vec<ResponseItem>,
explicitly_enabled_connectors: &HashSet<String>,
skills_outcome: Option<&SkillLoadOutcome>,
server_model_warning_emitted_for_turn: &mut bool,
cancellation_token: CancellationToken,
) -> CodexResult<SamplingRequestResult> {
let router = built_tools(
@@ -4906,6 +4940,7 @@ async fn run_sampling_request(
client_session,
turn_metadata_header,
Arc::clone(&turn_diff_tracker),
server_model_warning_emitted_for_turn,
&prompt,
cancellation_token.child_token(),
)
@@ -5474,6 +5509,7 @@ async fn try_run_sampling_request(
client_session: &mut ModelClientSession,
turn_metadata_header: Option<&str>,
turn_diff_tracker: SharedTurnDiffTracker,
server_model_warning_emitted_for_turn: &mut bool,
prompt: &Prompt,
cancellation_token: CancellationToken,
) -> CodexResult<SamplingRequestResult> {
@@ -5616,6 +5652,15 @@ async fn try_run_sampling_request(
active_item = Some(turn_item);
}
}
ResponseEvent::ServerModel(server_model) => {
if !*server_model_warning_emitted_for_turn
&& sess
.maybe_warn_on_server_model_mismatch(&turn_context, server_model)
.await
{
*server_model_warning_emitted_for_turn = true;
}
}
ResponseEvent::ServerReasoningIncluded(included) => {
sess.set_server_reasoning_included(included).await;
}