Files
codex/codex-rs/core/src/client.rs
2025-11-25 18:06:12 +00:00

543 lines
19 KiB
Rust

use std::sync::Arc;
use crate::api_bridge::auth_provider_from_auth;
use crate::api_bridge::map_api_error;
use codex_api::AggregateStreamExt;
use codex_api::ChatClient as ApiChatClient;
use codex_api::CompactClient as ApiCompactClient;
use codex_api::CompactionInput as ApiCompactionInput;
use codex_api::Prompt as ApiPrompt;
use codex_api::RequestTelemetry;
use codex_api::ReqwestTransport;
use codex_api::ResponseStream as ApiResponseStream;
use codex_api::ResponsesClient as ApiResponsesClient;
use codex_api::ResponsesOptions as ApiResponsesOptions;
use codex_api::SseTelemetry;
use codex_api::TransportError;
use codex_api::common::Reasoning;
use codex_api::create_text_param_for_request;
use codex_api::error::ApiError;
use codex_app_server_protocol::AuthMode;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::SessionSource;
use eventsource_stream::Event;
use eventsource_stream::EventStreamError;
use futures::StreamExt;
use http::HeaderMap as ApiHeaderMap;
use http::HeaderValue;
use http::StatusCode as HttpStatusCode;
use reqwest::StatusCode;
use serde_json::Value;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::warn;
use crate::AuthManager;
use crate::auth::RefreshTokenError;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::config::Config;
use crate::default_client::build_reqwest_client;
use crate::error::CodexErr;
use crate::error::Result;
use crate::flags::CODEX_RS_SSE_FIXTURE;
use crate::model_family::ModelFamily;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::openai_model_info::get_model_info;
use crate::tools::spec::create_tools_json_for_chat_completions_api;
use crate::tools::spec::create_tools_json_for_responses_api;
#[derive(Debug, Clone)]
pub struct ModelClient {
config: Arc<Config>,
auth_manager: Option<Arc<AuthManager>>,
otel_event_manager: OtelEventManager,
provider: ModelProviderInfo,
conversation_id: ConversationId,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
session_source: SessionSource,
}
#[allow(clippy::too_many_arguments)]
impl ModelClient {
pub fn new(
config: Arc<Config>,
auth_manager: Option<Arc<AuthManager>>,
otel_event_manager: OtelEventManager,
provider: ModelProviderInfo,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
conversation_id: ConversationId,
session_source: SessionSource,
) -> Self {
Self {
config,
auth_manager,
otel_event_manager,
provider,
conversation_id,
effort,
summary,
session_source,
}
}
pub fn get_model_context_window(&self) -> Option<i64> {
let pct = self.config.model_family.effective_context_window_percent;
self.config
.model_context_window
.or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window))
.map(|w| w.saturating_mul(pct) / 100)
}
pub fn get_auto_compact_token_limit(&self) -> Option<i64> {
self.config.model_auto_compact_token_limit.or_else(|| {
get_model_info(&self.config.model_family).and_then(|info| info.auto_compact_token_limit)
})
}
pub fn config(&self) -> Arc<Config> {
Arc::clone(&self.config)
}
pub fn provider(&self) -> &ModelProviderInfo {
&self.provider
}
/// Streams a single model turn using either the Responses or Chat
/// Completions wire API, depending on the configured provider.
///
/// For Chat providers, the underlying stream is optionally aggregated
/// based on the `show_raw_agent_reasoning` flag in the config.
pub async fn stream(&self, prompt: &Prompt) -> Result<ResponseStream> {
match self.provider.wire_api {
WireApi::Responses => self.stream_responses_api(prompt).await,
WireApi::Chat => {
let api_stream = self.stream_chat_completions(prompt).await?;
if self.config.show_raw_agent_reasoning {
Ok(map_response_stream(
api_stream.streaming_mode(),
self.otel_event_manager.clone(),
))
} else {
Ok(map_response_stream(
api_stream.aggregate(),
self.otel_event_manager.clone(),
))
}
}
}
}
/// Streams a turn via the OpenAI Chat Completions API.
///
/// This path is only used when the provider is configured with
/// `WireApi::Chat`; it does not support `output_schema` today.
async fn stream_chat_completions(&self, prompt: &Prompt) -> Result<ApiResponseStream> {
if prompt.output_schema.is_some() {
return Err(CodexErr::UnsupportedOperation(
"output_schema is not supported for Chat Completions API".to_string(),
));
}
let auth_manager = self.auth_manager.clone();
let instructions = prompt
.get_full_instructions(&self.config.model_family)
.into_owned();
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
let api_prompt = build_api_prompt(prompt, instructions, tools_json);
let conversation_id = self.conversation_id.to_string();
let session_source = self.session_source.clone();
let mut refreshed = false;
loop {
let auth = auth_manager.as_ref().and_then(|m| m.auth());
let api_provider = self
.provider
.to_api_provider(auth.as_ref().map(|a| a.mode))?;
let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
let transport = ReqwestTransport::new(build_reqwest_client());
let (request_telemetry, sse_telemetry) = self.build_streaming_telemetry();
let client = ApiChatClient::new(transport, api_provider, api_auth)
.with_telemetry(Some(request_telemetry), Some(sse_telemetry));
let stream_result = client
.stream_prompt(
&self.config.model,
&api_prompt,
Some(conversation_id.clone()),
Some(session_source.clone()),
)
.await;
match stream_result {
Ok(stream) => return Ok(stream),
Err(ApiError::Transport(TransportError::Http { status, .. }))
if status == StatusCode::UNAUTHORIZED =>
{
handle_unauthorized(status, &mut refreshed, &auth_manager, &auth).await?;
continue;
}
Err(err) => return Err(map_api_error(err)),
}
}
}
/// Streams a turn via the OpenAI Responses API.
///
/// Handles SSE fixtures, reasoning summaries, verbosity, and the
/// `text` controls used for output schemas.
async fn stream_responses_api(&self, prompt: &Prompt) -> Result<ResponseStream> {
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
warn!(path, "Streaming from fixture");
let stream = codex_api::stream_from_fixture(path, self.provider.stream_idle_timeout())
.map_err(map_api_error)?;
return Ok(map_response_stream(stream, self.otel_event_manager.clone()));
}
let auth_manager = self.auth_manager.clone();
let instructions = prompt
.get_full_instructions(&self.config.model_family)
.into_owned();
let tools_json: Vec<Value> = create_tools_json_for_responses_api(&prompt.tools)?;
let reasoning = if self.config.model_family.supports_reasoning_summaries {
Some(Reasoning {
effort: self
.effort
.or(self.config.model_family.default_reasoning_effort),
summary: Some(self.summary),
})
} else {
None
};
let include: Vec<String> = if reasoning.is_some() {
vec!["reasoning.encrypted_content".to_string()]
} else {
vec![]
};
let verbosity = if self.config.model_family.support_verbosity {
self.config
.model_verbosity
.or(self.config.model_family.default_verbosity)
} else {
if self.config.model_verbosity.is_some() {
warn!(
"model_verbosity is set but ignored as the model does not support verbosity: {}",
self.config.model_family.family
);
}
None
};
let text = create_text_param_for_request(verbosity, &prompt.output_schema);
let api_prompt = build_api_prompt(prompt, instructions.clone(), tools_json);
let conversation_id = self.conversation_id.to_string();
let session_source = self.session_source.clone();
let mut refreshed = false;
loop {
let auth = auth_manager.as_ref().and_then(|m| m.auth());
let api_provider = self
.provider
.to_api_provider(auth.as_ref().map(|a| a.mode))?;
let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
let transport = ReqwestTransport::new(build_reqwest_client());
let (request_telemetry, sse_telemetry) = self.build_streaming_telemetry();
let client = ApiResponsesClient::new(transport, api_provider, api_auth)
.with_telemetry(Some(request_telemetry), Some(sse_telemetry));
let options = ApiResponsesOptions {
reasoning: reasoning.clone(),
include: include.clone(),
prompt_cache_key: Some(conversation_id.clone()),
text: text.clone(),
store_override: None,
conversation_id: Some(conversation_id.clone()),
session_source: Some(session_source.clone()),
};
let stream_result = client
.stream_prompt(&self.config.model, &api_prompt, options)
.await;
match stream_result {
Ok(stream) => {
return Ok(map_response_stream(stream, self.otel_event_manager.clone()));
}
Err(ApiError::Transport(TransportError::Http { status, .. }))
if status == StatusCode::UNAUTHORIZED =>
{
handle_unauthorized(status, &mut refreshed, &auth_manager, &auth).await?;
continue;
}
Err(err) => return Err(map_api_error(err)),
}
}
}
pub fn get_provider(&self) -> ModelProviderInfo {
self.provider.clone()
}
pub fn get_otel_event_manager(&self) -> OtelEventManager {
self.otel_event_manager.clone()
}
pub fn get_session_source(&self) -> SessionSource {
self.session_source.clone()
}
/// Returns the currently configured model slug.
pub fn get_model(&self) -> String {
self.config.model.clone()
}
/// Returns the currently configured model family.
pub fn get_model_family(&self) -> ModelFamily {
self.config.model_family.clone()
}
/// Returns the current reasoning effort setting.
pub fn get_reasoning_effort(&self) -> Option<ReasoningEffortConfig> {
self.effort
}
/// Returns the current reasoning summary setting.
pub fn get_reasoning_summary(&self) -> ReasoningSummaryConfig {
self.summary
}
pub fn get_auth_manager(&self) -> Option<Arc<AuthManager>> {
self.auth_manager.clone()
}
/// Compacts the current conversation history using the Compact endpoint.
///
/// This is a unary call (no streaming) that returns a new list of
/// `ResponseItem`s representing the compacted transcript.
pub async fn compact_conversation_history(&self, prompt: &Prompt) -> Result<Vec<ResponseItem>> {
if prompt.input.is_empty() {
return Ok(Vec::new());
}
let auth_manager = self.auth_manager.clone();
let auth = auth_manager.as_ref().and_then(|m| m.auth());
let api_provider = self
.provider
.to_api_provider(auth.as_ref().map(|a| a.mode))?;
let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
let transport = ReqwestTransport::new(build_reqwest_client());
let request_telemetry = self.build_request_telemetry();
let client = ApiCompactClient::new(transport, api_provider, api_auth)
.with_telemetry(Some(request_telemetry));
let instructions = prompt
.get_full_instructions(&self.config.model_family)
.into_owned();
let payload = ApiCompactionInput {
model: &self.config.model,
input: &prompt.input,
instructions: &instructions,
};
let mut extra_headers = ApiHeaderMap::new();
if let SessionSource::SubAgent(sub) = &self.session_source {
let subagent = if let crate::protocol::SubAgentSource::Other(label) = sub {
label.clone()
} else {
serde_json::to_value(sub)
.ok()
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
.unwrap_or_else(|| "other".to_string())
};
if let Ok(val) = HeaderValue::from_str(&subagent) {
extra_headers.insert("x-openai-subagent", val);
}
}
client
.compact_input(&payload, extra_headers)
.await
.map_err(map_api_error)
}
}
impl ModelClient {
/// Builds request and SSE telemetry for streaming API calls (Chat/Responses).
fn build_streaming_telemetry(&self) -> (Arc<dyn RequestTelemetry>, Arc<dyn SseTelemetry>) {
let telemetry = Arc::new(ApiTelemetry::new(self.otel_event_manager.clone()));
let request_telemetry: Arc<dyn RequestTelemetry> = telemetry.clone();
let sse_telemetry: Arc<dyn SseTelemetry> = telemetry;
(request_telemetry, sse_telemetry)
}
/// Builds request telemetry for unary API calls (e.g., Compact endpoint).
fn build_request_telemetry(&self) -> Arc<dyn RequestTelemetry> {
let telemetry = Arc::new(ApiTelemetry::new(self.otel_event_manager.clone()));
let request_telemetry: Arc<dyn RequestTelemetry> = telemetry;
request_telemetry
}
}
/// Adapts the core `Prompt` type into the `codex-api` payload shape.
fn build_api_prompt(prompt: &Prompt, instructions: String, tools_json: Vec<Value>) -> ApiPrompt {
ApiPrompt {
instructions,
input: prompt.get_formatted_input(),
tools: tools_json,
parallel_tool_calls: prompt.parallel_tool_calls,
output_schema: prompt.output_schema.clone(),
}
}
fn map_response_stream<S>(api_stream: S, otel_event_manager: OtelEventManager) -> ResponseStream
where
S: futures::Stream<Item = std::result::Result<ResponseEvent, ApiError>>
+ Unpin
+ Send
+ 'static,
{
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
let manager = otel_event_manager;
tokio::spawn(async move {
let mut logged_error = false;
let mut api_stream = api_stream;
while let Some(event) = api_stream.next().await {
match event {
Ok(ResponseEvent::Completed {
response_id,
token_usage,
}) => {
if let Some(usage) = &token_usage {
manager.sse_event_completed(
usage.input_tokens,
usage.output_tokens,
Some(usage.cached_input_tokens),
Some(usage.reasoning_output_tokens),
usage.total_tokens,
);
}
if tx_event
.send(Ok(ResponseEvent::Completed {
response_id,
token_usage,
}))
.await
.is_err()
{
return;
}
}
Ok(event) => {
if tx_event.send(Ok(event)).await.is_err() {
return;
}
}
Err(err) => {
let mapped = map_api_error(err);
if !logged_error {
manager.see_event_completed_failed(&mapped);
logged_error = true;
}
if tx_event.send(Err(mapped)).await.is_err() {
return;
}
}
}
}
});
ResponseStream { rx_event }
}
/// Handles a 401 response by optionally refreshing ChatGPT tokens once.
///
/// When refresh succeeds, the caller should retry the API call; otherwise
/// the mapped `CodexErr` is returned to the caller.
async fn handle_unauthorized(
status: StatusCode,
refreshed: &mut bool,
auth_manager: &Option<Arc<AuthManager>>,
auth: &Option<crate::auth::CodexAuth>,
) -> Result<()> {
if *refreshed {
return Err(map_unauthorized_status(status));
}
if let Some(manager) = auth_manager.as_ref()
&& let Some(auth) = auth.as_ref()
&& auth.mode == AuthMode::ChatGPT
{
match manager.refresh_token().await {
Ok(_) => {
*refreshed = true;
Ok(())
}
Err(RefreshTokenError::Permanent(failed)) => Err(CodexErr::RefreshTokenFailed(failed)),
Err(RefreshTokenError::Transient(other)) => Err(CodexErr::Io(other)),
}
} else {
Err(map_unauthorized_status(status))
}
}
fn map_unauthorized_status(status: StatusCode) -> CodexErr {
map_api_error(ApiError::Transport(TransportError::Http {
status,
headers: None,
body: None,
}))
}
struct ApiTelemetry {
otel_event_manager: OtelEventManager,
}
impl ApiTelemetry {
fn new(otel_event_manager: OtelEventManager) -> Self {
Self { otel_event_manager }
}
}
impl RequestTelemetry for ApiTelemetry {
fn on_request(
&self,
attempt: u64,
status: Option<HttpStatusCode>,
error: Option<&TransportError>,
duration: Duration,
) {
let error_message = error.map(std::string::ToString::to_string);
self.otel_event_manager.record_api_request(
attempt,
status.map(|s| s.as_u16()),
error_message.as_deref(),
duration,
);
}
}
impl SseTelemetry for ApiTelemetry {
fn on_sse_poll(
&self,
result: &std::result::Result<
Option<std::result::Result<Event, EventStreamError<TransportError>>>,
tokio::time::error::Elapsed,
>,
duration: Duration,
) {
self.otel_event_manager.log_sse_event(result, duration);
}
}