mirror of
https://github.com/openai/codex.git
synced 2026-04-30 09:26:44 +00:00
add(core): safety check downgrade warning (#11964)
Add per-turn notice when a request is downgraded to a fallback model due
to cyber safety checks.
**Changes**
- codex-api: Emit a ServerModel event based on the openai-model response
header and/or response payload (SSE + WebSocket), including when the
model changes mid-stream.
- core: When the server-reported model differs from the requested model,
emit a single per-turn warning explaining the reroute to gpt-5.2 and
directing users to Trusted
Access verification and the cyber safety explainer.
- app-server (v2): Surface these cyber model-routing warnings as
synthetic userMessage items with text prefixed by Warning: (and document
this behavior).
This commit is contained in:
@@ -56,6 +56,9 @@ pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
OutputItemAdded(ResponseItem),
|
||||
/// Emitted when the server includes `OpenAI-Model` on the stream response.
|
||||
/// This can differ from the requested model when backend safety routing applies.
|
||||
ServerModel(String),
|
||||
/// Emitted when `X-Reasoning-Included: true` is present on the response,
|
||||
/// meaning the server already accounted for past reasoning tokens and the
|
||||
/// client should not re-estimate them.
|
||||
|
||||
@@ -63,6 +63,9 @@ impl Stream for AggregatedStream {
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ModelsEtag(etag)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ModelsEtag(etag))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ServerModel(model)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ServerModel(model))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
|
||||
@@ -163,6 +163,7 @@ impl Drop for WsStream {
|
||||
const X_CODEX_TURN_STATE_HEADER: &str = "x-codex-turn-state";
|
||||
const X_MODELS_ETAG_HEADER: &str = "x-models-etag";
|
||||
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
|
||||
const OPENAI_MODEL_HEADER: &str = "openai-model";
|
||||
|
||||
pub struct ResponsesWebsocketConnection {
|
||||
stream: Arc<Mutex<Option<WsStream>>>,
|
||||
@@ -170,6 +171,7 @@ pub struct ResponsesWebsocketConnection {
|
||||
idle_timeout: Duration,
|
||||
server_reasoning_included: bool,
|
||||
models_etag: Option<String>,
|
||||
server_model: Option<String>,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
}
|
||||
|
||||
@@ -179,6 +181,7 @@ impl ResponsesWebsocketConnection {
|
||||
idle_timeout: Duration,
|
||||
server_reasoning_included: bool,
|
||||
models_etag: Option<String>,
|
||||
server_model: Option<String>,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
@@ -186,6 +189,7 @@ impl ResponsesWebsocketConnection {
|
||||
idle_timeout,
|
||||
server_reasoning_included,
|
||||
models_etag,
|
||||
server_model,
|
||||
telemetry,
|
||||
}
|
||||
}
|
||||
@@ -204,12 +208,16 @@ impl ResponsesWebsocketConnection {
|
||||
let idle_timeout = self.idle_timeout;
|
||||
let server_reasoning_included = self.server_reasoning_included;
|
||||
let models_etag = self.models_etag.clone();
|
||||
let server_model = self.server_model.clone();
|
||||
let telemetry = self.telemetry.clone();
|
||||
let request_body = serde_json::to_value(&request).map_err(|err| {
|
||||
ApiError::Stream(format!("failed to encode websocket request: {err}"))
|
||||
})?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Some(model) = server_model {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::ServerModel(model))).await;
|
||||
}
|
||||
if let Some(etag) = models_etag {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
|
||||
}
|
||||
@@ -273,13 +281,14 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
|
||||
merge_request_headers(&self.provider.headers, extra_headers, default_headers);
|
||||
add_auth_headers_to_header_map(&self.auth, &mut headers);
|
||||
|
||||
let (stream, server_reasoning_included, models_etag) =
|
||||
let (stream, server_reasoning_included, models_etag, server_model) =
|
||||
connect_websocket(ws_url, headers, turn_state.clone()).await?;
|
||||
Ok(ResponsesWebsocketConnection::new(
|
||||
stream,
|
||||
self.provider.stream_idle_timeout,
|
||||
server_reasoning_included,
|
||||
models_etag,
|
||||
server_model,
|
||||
telemetry,
|
||||
))
|
||||
}
|
||||
@@ -304,7 +313,7 @@ async fn connect_websocket(
|
||||
url: Url,
|
||||
headers: HeaderMap,
|
||||
turn_state: Option<Arc<OnceLock<String>>>,
|
||||
) -> Result<(WsStream, bool, Option<String>), ApiError> {
|
||||
) -> Result<(WsStream, bool, Option<String>, Option<String>), ApiError> {
|
||||
ensure_rustls_crypto_provider();
|
||||
info!("connecting to websocket: {url}");
|
||||
|
||||
@@ -341,6 +350,11 @@ async fn connect_websocket(
|
||||
.get(X_MODELS_ETAG_HEADER)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(ToString::to_string);
|
||||
let server_model = response
|
||||
.headers()
|
||||
.get(OPENAI_MODEL_HEADER)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(ToString::to_string);
|
||||
if let Some(turn_state) = turn_state
|
||||
&& let Some(header_value) = response
|
||||
.headers()
|
||||
@@ -349,7 +363,12 @@ async fn connect_websocket(
|
||||
{
|
||||
let _ = turn_state.set(header_value.to_string());
|
||||
}
|
||||
Ok((WsStream::new(stream), reasoning_included, models_etag))
|
||||
Ok((
|
||||
WsStream::new(stream),
|
||||
reasoning_included,
|
||||
models_etag,
|
||||
server_model,
|
||||
))
|
||||
}
|
||||
|
||||
fn websocket_config() -> WebSocketConfig {
|
||||
@@ -469,6 +488,7 @@ async fn run_websocket_response_stream(
|
||||
idle_timeout: Duration,
|
||||
telemetry: Option<Arc<dyn WebsocketTelemetry>>,
|
||||
) -> Result<(), ApiError> {
|
||||
let mut last_server_model: Option<String> = None;
|
||||
let request_text = match serde_json::to_string(&request_body) {
|
||||
Ok(text) => text,
|
||||
Err(err) => {
|
||||
@@ -536,6 +556,14 @@ async fn run_websocket_response_stream(
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if let Some(model) = event.response_model()
|
||||
&& last_server_model.as_deref() != Some(model.as_str())
|
||||
{
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::ServerModel(model.clone())))
|
||||
.await;
|
||||
last_server_model = Some(model);
|
||||
}
|
||||
match process_responses_event(event) {
|
||||
Ok(Some(event)) => {
|
||||
let is_completed = matches!(event, ResponseEvent::Completed { .. });
|
||||
|
||||
@@ -26,6 +26,7 @@ use tracing::debug;
|
||||
use tracing::trace;
|
||||
|
||||
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
|
||||
const OPENAI_MODEL_HEADER: &str = "openai-model";
|
||||
|
||||
/// Streams SSE events from an on-disk fixture for tests.
|
||||
pub fn stream_from_fixture(
|
||||
@@ -60,6 +61,11 @@ pub fn spawn_response_stream(
|
||||
.get("X-Models-Etag")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(ToString::to_string);
|
||||
let server_model = stream_response
|
||||
.headers
|
||||
.get(OPENAI_MODEL_HEADER)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(ToString::to_string);
|
||||
let reasoning_included = stream_response
|
||||
.headers
|
||||
.get(X_REASONING_INCLUDED_HEADER)
|
||||
@@ -74,6 +80,9 @@ pub fn spawn_response_stream(
|
||||
}
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent, ApiError>>(1600);
|
||||
tokio::spawn(async move {
|
||||
if let Some(model) = server_model {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::ServerModel(model))).await;
|
||||
}
|
||||
for snapshot in rate_limit_snapshots {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::RateLimits(snapshot))).await;
|
||||
}
|
||||
@@ -169,6 +178,41 @@ impl ResponsesStreamEvent {
|
||||
pub fn kind(&self) -> &str {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn response_model(&self) -> Option<String> {
|
||||
self.response.as_ref().and_then(extract_server_model)
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_server_model(value: &Value) -> Option<String> {
|
||||
value
|
||||
.get("model")
|
||||
.and_then(json_value_as_string)
|
||||
.or_else(|| {
|
||||
value
|
||||
.get("headers")
|
||||
.and_then(header_openai_model_value_from_json)
|
||||
})
|
||||
}
|
||||
|
||||
fn header_openai_model_value_from_json(value: &Value) -> Option<String> {
|
||||
let headers = value.as_object()?;
|
||||
headers.iter().find_map(|(name, value)| {
|
||||
if name.eq_ignore_ascii_case("openai-model") || name.eq_ignore_ascii_case("x-openai-model")
|
||||
{
|
||||
json_value_as_string(value)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn json_value_as_string(value: &Value) -> Option<String> {
|
||||
match value {
|
||||
Value::String(value) => Some(value.clone()),
|
||||
Value::Array(items) => items.first().and_then(json_value_as_string),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -339,6 +383,7 @@ pub async fn process_sse(
|
||||
) {
|
||||
let mut stream = stream.eventsource();
|
||||
let mut response_error: Option<ApiError> = None;
|
||||
let mut last_server_model: Option<String> = None;
|
||||
|
||||
loop {
|
||||
let start = Instant::now();
|
||||
@@ -378,6 +423,19 @@ pub async fn process_sse(
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(model) = event.response_model()
|
||||
&& last_server_model.as_deref() != Some(model.as_str())
|
||||
{
|
||||
if tx_event
|
||||
.send(Ok(ResponseEvent::ServerModel(model.clone())))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
return;
|
||||
}
|
||||
last_server_model = Some(model);
|
||||
}
|
||||
|
||||
match process_responses_event(event) {
|
||||
Ok(Some(event)) => {
|
||||
let is_completed = matches!(event, ResponseEvent::Completed { .. });
|
||||
@@ -456,9 +514,13 @@ mod tests {
|
||||
use super::*;
|
||||
use assert_matches::assert_matches;
|
||||
use bytes::Bytes;
|
||||
use codex_client::StreamResponse;
|
||||
use codex_protocol::models::MessagePhase;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use futures::stream;
|
||||
use http::HeaderMap;
|
||||
use http::HeaderValue;
|
||||
use http::StatusCode;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -870,6 +932,149 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn spawn_response_stream_emits_server_model_header() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
OPENAI_MODEL_HEADER,
|
||||
HeaderValue::from_static(CYBER_RESTRICTED_MODEL_FOR_TESTS),
|
||||
);
|
||||
let bytes = stream::iter(Vec::<Result<Bytes, TransportError>>::new());
|
||||
let stream_response = StreamResponse {
|
||||
status: StatusCode::OK,
|
||||
headers,
|
||||
bytes: Box::pin(bytes),
|
||||
};
|
||||
|
||||
let mut stream = spawn_response_stream(stream_response, idle_timeout(), None, None);
|
||||
let event = stream
|
||||
.rx_event
|
||||
.recv()
|
||||
.await
|
||||
.expect("expected server model event")
|
||||
.expect("expected ok event");
|
||||
|
||||
match event {
|
||||
ResponseEvent::ServerModel(model) => {
|
||||
assert_eq!(model, CYBER_RESTRICTED_MODEL_FOR_TESTS);
|
||||
}
|
||||
other => panic!("expected server model event, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn process_sse_emits_server_model_from_response_payload() {
|
||||
let events = run_sse(vec![
|
||||
json!({
|
||||
"type": "response.created",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"model": CYBER_RESTRICTED_MODEL_FOR_TESTS
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"model": CYBER_RESTRICTED_MODEL_FOR_TESTS
|
||||
}
|
||||
}),
|
||||
])
|
||||
.await;
|
||||
|
||||
assert_eq!(events.len(), 3);
|
||||
assert_matches!(
|
||||
&events[0],
|
||||
ResponseEvent::ServerModel(model) if model == CYBER_RESTRICTED_MODEL_FOR_TESTS
|
||||
);
|
||||
assert_matches!(&events[1], ResponseEvent::Created);
|
||||
assert_matches!(
|
||||
&events[2],
|
||||
ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: None,
|
||||
can_append: false
|
||||
} if response_id == "resp-1"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn process_sse_emits_server_model_from_response_headers_payload() {
|
||||
let events = run_sse(vec![
|
||||
json!({
|
||||
"type": "response.created",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"headers": {
|
||||
"OpenAI-Model": CYBER_RESTRICTED_MODEL_FOR_TESTS
|
||||
}
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp-1"
|
||||
}
|
||||
}),
|
||||
])
|
||||
.await;
|
||||
|
||||
assert_eq!(events.len(), 3);
|
||||
assert_matches!(
|
||||
&events[0],
|
||||
ResponseEvent::ServerModel(model) if model == CYBER_RESTRICTED_MODEL_FOR_TESTS
|
||||
);
|
||||
assert_matches!(&events[1], ResponseEvent::Created);
|
||||
assert_matches!(
|
||||
&events[2],
|
||||
ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: None,
|
||||
can_append: false
|
||||
} if response_id == "resp-1"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn process_sse_emits_server_model_again_when_response_model_changes() {
|
||||
let events = run_sse(vec![
|
||||
json!({
|
||||
"type": "response.created",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"model": "gpt-5.2-codex"
|
||||
}
|
||||
}),
|
||||
json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"model": "gpt-5.3-codex"
|
||||
}
|
||||
}),
|
||||
])
|
||||
.await;
|
||||
|
||||
assert_eq!(events.len(), 4);
|
||||
assert_matches!(
|
||||
&events[0],
|
||||
ResponseEvent::ServerModel(model) if model == "gpt-5.2-codex"
|
||||
);
|
||||
assert_matches!(&events[1], ResponseEvent::Created);
|
||||
assert_matches!(
|
||||
&events[2],
|
||||
ResponseEvent::ServerModel(model) if model == "gpt-5.3-codex"
|
||||
);
|
||||
assert_matches!(
|
||||
&events[3],
|
||||
ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage: None,
|
||||
can_append: false
|
||||
} if response_id == "resp-1"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_parse_retry_after() {
|
||||
let err = Error {
|
||||
@@ -909,4 +1114,6 @@ mod tests {
|
||||
let delay = try_parse_retry_after(&err);
|
||||
assert_eq!(delay, Some(Duration::from_secs(35)));
|
||||
}
|
||||
|
||||
const CYBER_RESTRICTED_MODEL_FOR_TESTS: &str = "gpt-5.3-codex";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user