Session-level model client (#10664)

Make ModelClient a session-scoped object.
Move state that is session level onto the client, and make state that is
per-turn explicit on corresponding methods.
Stop taking a huge Config object, instead only pass in values that are
actually needed.

---------

Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
pakrym-oai
2026-02-04 16:58:48 -08:00
committed by GitHub
parent 224c9f768d
commit 0e8d359da9
12 changed files with 601 additions and 333 deletions

View File

@@ -8,7 +8,6 @@ use codex_core::ModelProviderInfo;
use codex_core::Prompt;
use codex_core::ResponseEvent;
use codex_core::ResponseItem;
use codex_core::TransportManager;
use codex_core::WireApi;
use codex_core::X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER;
use codex_core::features::Feature;
@@ -20,6 +19,8 @@ use codex_otel::metrics::MetricsConfig;
use codex_protocol::ThreadId;
use codex_protocol::account::PlanType;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::WebSocketConnectionConfig;
use core_test_support::responses::WebSocketTestServer;
@@ -42,6 +43,10 @@ const MODEL: &str = "gpt-5.2-codex";
struct WebsocketTestHarness {
_codex_home: TempDir,
client: ModelClient,
model_info: ModelInfo,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummary,
web_search_eligible: bool,
otel_manager: OtelManager,
}
@@ -56,10 +61,10 @@ async fn responses_websocket_streams_request() {
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut session, &prompt).await;
stream_until_complete(&mut client_session, &harness, &prompt).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 1);
@@ -86,10 +91,10 @@ async fn responses_websocket_emits_websocket_telemetry_events() {
let harness = websocket_harness(&server).await;
harness.otel_manager.reset_runtime_metrics();
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut session, &prompt).await;
stream_until_complete(&mut client_session, &harness, &prompt).await;
tokio::time::sleep(Duration::from_millis(10)).await;
@@ -124,10 +129,10 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics
let harness = websocket_harness_with_runtime_metrics(&server, true).await;
harness.otel_manager.reset_runtime_metrics();
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut session, &prompt).await;
stream_until_complete(&mut client_session, &harness, &prompt).await;
tokio::time::sleep(Duration::from_millis(10)).await;
let handshake = server.single_handshake();
@@ -157,10 +162,10 @@ async fn responses_websocket_omits_timing_metrics_header_when_runtime_metrics_di
.await;
let harness = websocket_harness_with_runtime_metrics(&server, false).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
stream_until_complete(&mut session, &prompt).await;
stream_until_complete(&mut client_session, &harness, &prompt).await;
let handshake = server.single_handshake();
assert_eq!(
@@ -182,11 +187,19 @@ async fn responses_websocket_emits_reasoning_included_event() {
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = session
.stream(&prompt)
let mut stream = client_session
.stream(
&prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
harness.web_search_eligible,
None,
)
.await
.expect("websocket stream failed");
@@ -245,11 +258,19 @@ async fn responses_websocket_emits_rate_limit_events() {
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt = prompt_with_input(vec![message_item("hello")]);
let mut stream = session
.stream(&prompt)
let mut stream = client_session
.stream(
&prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
harness.web_search_eligible,
None,
)
.await
.expect("websocket stream failed");
@@ -300,12 +321,12 @@ async fn responses_websocket_appends_on_prefix() {
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![message_item("hello"), message_item("second")]);
stream_until_complete(&mut session, &prompt_one).await;
stream_until_complete(&mut session, &prompt_two).await;
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
@@ -336,12 +357,12 @@ async fn responses_websocket_creates_on_non_prefix() {
.await;
let harness = websocket_harness(&server).await;
let mut session = harness.client.new_session(None);
let mut client_session = harness.client.new_session();
let prompt_one = prompt_with_input(vec![message_item("hello")]);
let prompt_two = prompt_with_input(vec![message_item("different")]);
stream_until_complete(&mut session, &prompt_one).await;
stream_until_complete(&mut session, &prompt_two).await;
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
@@ -431,29 +452,47 @@ async fn websocket_harness_with_runtime_metrics(
SessionSource::Exec,
)
.with_metrics(metrics);
let effort = None;
let summary = ReasoningSummary::Auto;
let web_search_eligible = true;
let client = ModelClient::new(
Arc::clone(&config),
None,
model_info,
otel_manager.clone(),
provider.clone(),
None,
ReasoningSummary::Auto,
conversation_id,
provider.clone(),
SessionSource::Exec,
TransportManager::new(),
config.model_verbosity,
true,
false,
runtime_metrics_enabled,
None,
);
WebsocketTestHarness {
_codex_home: codex_home,
client,
model_info,
effort,
summary,
web_search_eligible,
otel_manager,
}
}
async fn stream_until_complete(session: &mut ModelClientSession, prompt: &Prompt) {
let mut stream = session
.stream(prompt)
async fn stream_until_complete(
client_session: &mut ModelClientSession,
harness: &WebsocketTestHarness,
prompt: &Prompt,
) {
let mut stream = client_session
.stream(
prompt,
&harness.model_info,
&harness.otel_manager,
harness.effort,
harness.summary,
harness.web_search_eligible,
None,
)
.await
.expect("websocket stream failed");