Files
codex/codex-rs/core/tests/suite/client_websockets.rs
2026-02-13 12:48:29 -08:00

1093 lines
36 KiB
Rust
Executable File

#![allow(clippy::expect_used, clippy::unwrap_used)]
use codex_core::CodexAuth;
use codex_core::ContentItem;
use codex_core::ModelClient;
use codex_core::ModelClientSession;
use codex_core::ModelProviderInfo;
use codex_core::Prompt;
use codex_core::ResponseEvent;
use codex_core::ResponseItem;
use codex_core::WireApi;
use codex_core::X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER;
use codex_core::features::Feature;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_otel::OtelManager;
use codex_otel::TelemetryAuthMode;
use codex_otel::metrics::MetricsClient;
use codex_otel::metrics::MetricsConfig;
use codex_protocol::ThreadId;
use codex_protocol::account::PlanType;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::BaseInstructions;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::WebSocketConnectionConfig;
use core_test_support::responses::WebSocketTestServer;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_done;
use core_test_support::responses::ev_done_with_id;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::start_websocket_server;
use core_test_support::responses::start_websocket_server_with_headers;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use futures::StreamExt;
use opentelemetry_sdk::metrics::InMemoryMetricExporter;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::sync::Arc;
use std::time::Duration;
use tempfile::TempDir;
use tracing_test::traced_test;
const MODEL: &str = "gpt-5.2-codex";
const OPENAI_BETA_HEADER: &str = "OpenAI-Beta";
const OPENAI_BETA_RESPONSES_WEBSOCKETS: &str = "responses_websockets=2026-02-04";
const WS_V2_BETA_HEADER_VALUE: &str = "responses_websockets=2026-02-06";
struct WebsocketTestHarness {
_codex_home: TempDir,
client: ModelClient,
model_info: ModelInfo,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummary,
otel_manager: OtelManager,
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_streams_request() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 1);
let body = connection.first().expect("missing request").body_json();
assert_eq!(body["type"].as_str(), Some("response.create"));
assert_eq!(body["model"].as_str(), Some(MODEL));
assert_eq!(body["stream"], serde_json::Value::Bool(true));
assert_eq!(body["input"].as_array().map(Vec::len), Some(1));
let handshake = server.single_handshake();
assert_eq!(
handshake.header(OPENAI_BETA_HEADER),
Some(OPENAI_BETA_RESPONSES_WEBSOCKETS.to_string())
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_preconnect_reuses_connection() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
client_session
.prewarm_websocket(&harness.otel_manager, &harness.model_info)
.await
.expect("websocket prewarm failed");
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
assert_eq!(server.handshakes().len(), 1);
assert_eq!(server.single_connection().len(), 1);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_preconnect_is_reused_even_with_header_changes() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
client_session
.prewarm_websocket(&harness.otel_manager, &harness.model_info)
.await
.expect("websocket prewarm failed");
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = client_session
.stream(
&prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
None,
)
.await
.expect("websocket stream failed");
while let Some(event) = stream.next().await {
if matches!(event, Ok(ResponseEvent::Completed { .. })) {
break;
}
}
assert_eq!(server.handshakes().len(), 1);
assert_eq!(server.single_connection().len(), 1);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_prewarm_uses_model_preference_when_feature_disabled() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness_with_options(&server, false, false, false, true).await;
let mut client_session = harness.client.new_session();
client_session
.prewarm_websocket(&harness.otel_manager, &harness.model_info)
.await
.expect("websocket prewarm failed");
// Prewarm should only perform the handshake, not send response.create.
assert_eq!(server.handshakes().len(), 1);
assert_eq!(server.single_connection().len(), 0);
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
assert_eq!(server.handshakes().len(), 1);
assert_eq!(server.single_connection().len(), 1);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[traced_test]
async fn responses_websocket_emits_websocket_telemetry_events() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness(&server).await;
harness.otel_manager.reset_runtime_metrics();
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
tokio::time::sleep(Duration::from_millis(10)).await;
let summary = harness
.otel_manager
.runtime_metrics_summary()
.expect("runtime metrics summary");
assert_eq!(summary.api_calls.count, 0);
assert_eq!(summary.streaming_events.count, 0);
assert_eq!(summary.websocket_calls.count, 1);
assert_eq!(summary.websocket_events.count, 2);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics_enabled() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
serde_json::json!({
"type": "responsesapi.websocket_timing",
"timing_metrics": {
"responses_duration_excl_engine_and_client_tool_time_ms": 120,
"engine_service_total_ms": 450,
"engine_iapi_ttft_total_ms": 310,
"engine_service_ttft_total_ms": 340,
"engine_iapi_tbt_across_engine_calls_ms": 220,
"engine_service_tbt_across_engine_calls_ms": 260
}
}),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness_with_runtime_metrics(&server, true).await;
harness.otel_manager.reset_runtime_metrics();
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
tokio::time::sleep(Duration::from_millis(10)).await;
let handshake = server.single_handshake();
assert_eq!(
handshake.header(X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER),
Some("true".to_string())
);
let summary = harness
.otel_manager
.runtime_metrics_summary()
.expect("runtime metrics summary");
assert_eq!(summary.responses_api_overhead_ms, 120);
assert_eq!(summary.responses_api_inference_time_ms, 450);
assert_eq!(summary.responses_api_engine_iapi_ttft_ms, 310);
assert_eq!(summary.responses_api_engine_service_ttft_ms, 340);
assert_eq!(summary.responses_api_engine_iapi_tbt_ms, 220);
assert_eq!(summary.responses_api_engine_service_tbt_ms, 260);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_omits_timing_metrics_header_when_runtime_metrics_disabled() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness_with_runtime_metrics(&server, false).await;
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut client_session, &harness, &prompt).await;
let handshake = server.single_handshake();
assert_eq!(
handshake.header(X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER),
None
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_emits_reasoning_included_event() {
skip_if_no_network!();
let server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
requests: vec![vec![ev_response_created("resp-1"), ev_completed("resp-1")]],
response_headers: vec![("X-Reasoning-Included".to_string(), "true".to_string())],
accept_delay: None,
}])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = client_session
.stream(
&prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
None,
)
.await
.expect("websocket stream failed");
let mut saw_reasoning_included = false;
while let Some(event) = stream.next().await {
match event.expect("event") {
ResponseEvent::ServerReasoningIncluded(true) => {
saw_reasoning_included = true;
}
ResponseEvent::Completed { .. } => break,
_ => {}
}
}
assert!(saw_reasoning_included);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_emits_rate_limit_events() {
skip_if_no_network!();
let rate_limit_event = json!({
"type": "codex.rate_limits",
"plan_type": "plus",
"rate_limits": {
"allowed": true,
"limit_reached": false,
"primary": {
"used_percent": 42,
"window_minutes": 60,
"reset_at": 1700000000
},
"secondary": null
},
"code_review_rate_limits": null,
"credits": {
"has_credits": true,
"unlimited": false,
"balance": "123"
},
"promo": null
});
let server = start_websocket_server_with_headers(vec![WebSocketConnectionConfig {
requests: vec![vec![
rate_limit_event,
ev_response_created("resp-1"),
ev_completed("resp-1"),
]],
response_headers: vec![
("X-Models-Etag".to_string(), "etag-123".to_string()),
("X-Reasoning-Included".to_string(), "true".to_string()),
],
accept_delay: None,
}])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = client_session
.stream(
&prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
None,
)
.await
.expect("websocket stream failed");
let mut saw_rate_limits = None;
let mut saw_models_etag = None;
let mut saw_reasoning_included = false;
while let Some(event) = stream.next().await {
match event.expect("event") {
ResponseEvent::RateLimits(snapshot) => {
saw_rate_limits = Some(snapshot);
}
ResponseEvent::ModelsEtag(etag) => {
saw_models_etag = Some(etag);
}
ResponseEvent::ServerReasoningIncluded(true) => {
saw_reasoning_included = true;
}
ResponseEvent::Completed { .. } => break,
_ => {}
}
}
let rate_limits = saw_rate_limits.expect("missing rate limits");
let primary = rate_limits.primary.expect("missing primary window");
assert_eq!(primary.used_percent, 42.0);
assert_eq!(primary.window_minutes, Some(60));
assert_eq!(primary.resets_at, Some(1_700_000_000));
assert_eq!(rate_limits.plan_type, Some(PlanType::Plus));
let credits = rate_limits.credits.expect("missing credits");
assert!(credits.has_credits);
assert!(!credits.unlimited);
assert_eq!(credits.balance.as_deref(), Some("123"));
assert_eq!(saw_models_etag.as_deref(), Some("etag-123"));
assert!(saw_reasoning_included);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_usage_limit_error_emits_rate_limit_event() {
skip_if_no_network!();
let usage_limit_error = json!({
"type": "error",
"status": 429,
"error": {
"type": "usage_limit_reached",
"message": "The usage limit has been reached",
"plan_type": "pro",
"resets_at": 1704067242,
"resets_in_seconds": 1234
},
"headers": {
"x-codex-primary-used-percent": "100.0",
"x-codex-secondary-used-percent": "87.5",
"x-codex-primary-over-secondary-limit-percent": "95.0",
"x-codex-primary-window-minutes": "15",
"x-codex-secondary-window-minutes": "60"
}
});
let server = start_websocket_server(vec![vec![vec![usage_limit_error]]]).await;
let mut builder = test_codex().with_config(|config| {
config.model_provider.request_max_retries = Some(0);
config.model_provider.stream_max_retries = Some(0);
});
let test = builder
.build_with_websocket_server(&server)
.await
.expect("build websocket codex");
let submission_id = test
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await
.expect("submission should succeed while emitting usage limit error events");
let token_event =
wait_for_event(&test.codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
let EventMsg::TokenCount(event) = token_event else {
unreachable!();
};
let event_json = serde_json::to_value(&event).expect("serialize token count event");
pretty_assertions::assert_eq!(
event_json,
json!({
"info": null,
"rate_limits": {
"limit_id": "codex",
"limit_name": null,
"primary": {
"used_percent": 100.0,
"window_minutes": 15,
"resets_at": null
},
"secondary": {
"used_percent": 87.5,
"window_minutes": 60,
"resets_at": null
},
"credits": null,
"plan_type": null
}
})
);
let error_event = wait_for_event(&test.codex, |msg| matches!(msg, EventMsg::Error(_))).await;
let EventMsg::Error(error_event) = error_event else {
unreachable!();
};
assert!(
error_event.message.to_lowercase().contains("usage limit"),
"unexpected error message for submission {submission_id}: {}",
error_event.message
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_invalid_request_error_with_status_is_forwarded() {
skip_if_no_network!();
let invalid_request_error = json!({
"type": "error",
"status": 400,
"error": {
"type": "invalid_request_error",
"message": "Model 'castor-raikou-0205-ev3' does not support image inputs."
}
});
let server = start_websocket_server(vec![vec![vec![invalid_request_error]]]).await;
let mut builder = test_codex().with_config(|config| {
config.model_provider.request_max_retries = Some(0);
config.model_provider.stream_max_retries = Some(0);
});
let test = builder
.build_with_websocket_server(&server)
.await
.expect("build websocket codex");
let submission_id = test
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await
.expect("submission should succeed while emitting invalid request events");
let error_event = wait_for_event(&test.codex, |msg| matches!(msg, EventMsg::Error(_))).await;
let EventMsg::Error(error_event) = error_event else {
unreachable!();
};
assert!(
error_event
.message
.to_lowercase()
.contains("does not support image inputs"),
"unexpected error message for submission {submission_id}: {}",
error_event.message
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_appends_on_prefix() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "assistant output"),
ev_done(),
],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![
message_item("hello"),
assistant_message_item("msg-1", "assistant output"),
message_item("second"),
]);
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let first = connection.first().expect("missing request").body_json();
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(first["type"].as_str(), Some("response.create"));
assert_eq!(first["model"].as_str(), Some(MODEL));
assert_eq!(first["stream"], serde_json::Value::Bool(true));
assert_eq!(first["input"].as_array().map(Vec::len), Some(1));
let expected_append = serde_json::json!({
"type": "response.append",
"input": serde_json::to_value(&prompt_two.input[2..]).expect("serialize append items"),
});
assert_eq!(second, expected_append);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_creates_on_prefix_when_previous_completion_cannot_append() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "assistant output"),
ev_completed("resp-1"),
],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![
message_item("hello"),
assistant_message_item("msg-1", "assistant output"),
message_item("second"),
]);
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).expect("serialize full input")
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_creates_on_non_prefix() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![message_item("different")]);
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second["model"].as_str(), Some(MODEL));
assert_eq!(second["stream"], serde_json::Value::Bool(true));
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).unwrap()
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_creates_when_non_input_request_fields_change() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt_one =
prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
let prompt_two = prompt_with_input_and_instructions(
vec![message_item("hello"), message_item("second")],
"base instructions two",
);
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second.get("previous_response_id"), None);
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).expect("serialize full input")
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_creates_with_previous_response_id_on_prefix() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "assistant output"),
ev_done_with_id("resp-1"),
],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![
message_item("hello"),
assistant_message_item("msg-1", "assistant output"),
message_item("second"),
]);
stream_until_complete(&mut session, &harness, &prompt_one).await;
stream_until_complete(&mut session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let first = connection.first().expect("missing request").body_json();
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(first["type"].as_str(), Some("response.create"));
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second["previous_response_id"].as_str(), Some("resp-1"));
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input[2..]).unwrap()
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_creates_without_previous_response_id_when_non_input_fields_change()
{
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt_one =
prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
let prompt_two = prompt_with_input_and_instructions(
vec![message_item("hello"), message_item("second")],
"base instructions two",
);
stream_until_complete(&mut session, &harness, &prompt_one).await;
stream_until_complete(&mut session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second.get("previous_response_id"), None);
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).expect("serialize full input")
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_after_error_uses_full_create_without_previous_response_id() {
skip_if_no_network!();
let server = start_websocket_server(vec![
vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![json!({
"type": "response.failed",
"response": {
"error": {
"code": "invalid_prompt",
"message": "synthetic websocket failure"
}
}
})],
],
vec![vec![ev_response_created("resp-3"), ev_completed("resp-3")]],
])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![message_item("hello"), message_item("second")]);
let prompt_three = prompt_with_input(vec![
message_item("hello"),
message_item("second"),
message_item("third"),
]);
stream_until_complete(&mut session, &harness, &prompt_one).await;
let mut second_stream = session
.stream(
&prompt_two,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
None,
)
.await
.expect("websocket stream failed");
let mut saw_error = false;
while let Some(event) = second_stream.next().await {
if event.is_err() {
saw_error = true;
break;
}
}
assert!(saw_error, "expected second websocket stream to error");
stream_until_complete(&mut session, &harness, &prompt_three).await;
assert_eq!(server.handshakes().len(), 2);
let connections = server.connections();
assert_eq!(connections.len(), 2);
let first_connection = connections.first().expect("missing first connection");
assert_eq!(first_connection.len(), 2);
let first = first_connection
.first()
.expect("missing first request")
.body_json();
let second = first_connection
.get(1)
.expect("missing second request")
.body_json();
let third = connections
.get(1)
.and_then(|connection| connection.first())
.expect("missing third request")
.body_json();
assert_eq!(first["type"].as_str(), Some("response.create"));
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second["previous_response_id"].as_str(), Some("resp-1"));
assert_eq!(third["type"].as_str(), Some("response.create"));
assert_eq!(third.get("previous_response_id"), None);
assert_eq!(
third["input"],
serde_json::to_value(&prompt_three.input).unwrap()
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_sets_openai_beta_header() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![vec![
ev_response_created("resp-1"),
ev_completed("resp-1"),
]]])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut session, &harness, &prompt).await;
let handshake = server.single_handshake();
let openai_beta_header = handshake
.header(OPENAI_BETA_HEADER)
.expect("missing OpenAI-Beta header");
assert!(
openai_beta_header
.split(',')
.map(str::trim)
.any(|value| value == WS_V2_BETA_HEADER_VALUE)
);
assert!(
!openai_beta_header
.split(',')
.map(str::trim)
.any(|value| value == OPENAI_BETA_RESPONSES_WEBSOCKETS)
);
server.shutdown().await;
}
fn message_item(text: &str) -> ResponseItem {
ResponseItem::Message {
id: None,
role: "user".into(),
content: vec![ContentItem::InputText { text: text.into() }],
end_turn: None,
phase: None,
}
}
fn assistant_message_item(id: &str, text: &str) -> ResponseItem {
ResponseItem::Message {
id: Some(id.to_string()),
role: "assistant".into(),
content: vec![ContentItem::OutputText { text: text.into() }],
end_turn: None,
phase: None,
}
}
fn prompt_with_input(input: Vec<ResponseItem>) -> Prompt {
let mut prompt = Prompt::default();
prompt.input = input;
prompt
}
fn prompt_with_input_and_instructions(input: Vec<ResponseItem>, instructions: &str) -> Prompt {
let mut prompt = prompt_with_input(input);
prompt.base_instructions = BaseInstructions {
text: instructions.to_string(),
};
prompt
}
fn websocket_provider(server: &WebSocketTestServer) -> ModelProviderInfo {
ModelProviderInfo {
name: "mock-ws".into(),
base_url: Some(format!("{}/v1", server.uri())),
env_key: None,
env_key_instructions: None,
experimental_bearer_token: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000),
requires_openai_auth: false,
supports_websockets: true,
}
}
async fn websocket_harness(server: &WebSocketTestServer) -> WebsocketTestHarness {
websocket_harness_with_runtime_metrics(server, false).await
}
async fn websocket_harness_with_runtime_metrics(
server: &WebSocketTestServer,
runtime_metrics_enabled: bool,
) -> WebsocketTestHarness {
websocket_harness_with_options(server, runtime_metrics_enabled, true, false, false).await
}
async fn websocket_harness_with_v2(
server: &WebSocketTestServer,
websocket_v2_enabled: bool,
) -> WebsocketTestHarness {
websocket_harness_with_options(server, false, true, websocket_v2_enabled, false).await
}
async fn websocket_harness_with_options(
server: &WebSocketTestServer,
runtime_metrics_enabled: bool,
websocket_enabled: bool,
websocket_v2_enabled: bool,
prefer_websockets: bool,
) -> WebsocketTestHarness {
let provider = websocket_provider(server);
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home).await;
config.model = Some(MODEL.to_string());
if websocket_enabled {
config.features.enable(Feature::ResponsesWebsockets);
} else {
config.features.disable(Feature::ResponsesWebsockets);
}
if runtime_metrics_enabled {
config.features.enable(Feature::RuntimeMetrics);
}
if websocket_v2_enabled {
config.features.enable(Feature::ResponsesWebsocketsV2);
}
let config = Arc::new(config);
let mut model_info = codex_core::test_support::construct_model_info_offline(MODEL, &config);
model_info.prefer_websockets = prefer_websockets;
let conversation_id = ThreadId::new();
let auth_manager =
codex_core::test_support::auth_manager_from_auth(CodexAuth::from_api_key("Test API Key"));
let exporter = InMemoryMetricExporter::default();
let metrics = MetricsClient::new(
MetricsConfig::in_memory("test", "codex-core", env!("CARGO_PKG_VERSION"), exporter)
.with_runtime_reader(),
)
.expect("in-memory metrics client");
let otel_manager = OtelManager::new(
conversation_id,
MODEL,
model_info.slug.as_str(),
None,
Some("test@test.com".to_string()),
auth_manager.auth_mode().map(TelemetryAuthMode::from),
"test_originator".to_string(),
false,
"test".to_string(),
SessionSource::Exec,
)
.with_metrics(metrics);
let effort = None;
let summary = ReasoningSummary::Auto;
let client = ModelClient::new(
None,
conversation_id,
provider.clone(),
SessionSource::Exec,
config.model_verbosity,
websocket_enabled,
websocket_v2_enabled,
false,
runtime_metrics_enabled,
None,
);
WebsocketTestHarness {
_codex_home: codex_home,
client,
model_info,
effort,
summary,
otel_manager,
}
}
async fn stream_until_complete(
client_session: &mut ModelClientSession,
harness: &WebsocketTestHarness,
prompt: &Prompt,
) {
let mut stream = client_session
.stream(
prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
None,
)
.await
.expect("websocket stream failed");
while let Some(event) = stream.next().await {
if matches!(event, Ok(ResponseEvent::Completed { .. })) {
break;
}
}
}