Compare commits

...

4 Commits

Author SHA1 Message Date
Eric Traut
170f8dac53 Fixed merge conflict 2025-12-18 13:25:59 -08:00
Eric Traut
4c088dd3ff Merge branch 'main' into drifkin/ollama-oss-responses 2025-12-18 13:01:07 -06:00
Devon Rifkin
e643e363f7 also detect Ollama version for the built-in, non---oss case
Previously we were only downgrading to chat for older `ollama`s when
using `--oss`, but you can also use the built-in ollama provider via
`model_provider = "ollama"` in your config
2025-12-17 20:25:48 -08:00
Devon Rifkin
502ae9ff54 ollama_oss: default WireApi to Responses, fall back to Chat
In Ollama v0.13.4 `/v1/responses` support was added. When Ollama is used
via `--oss`, it now defaults to Responses. At runtime we do a version
check and if the version is older than when Responses support was added,
we fall back to `/v1/chat/completions`.
2025-12-17 17:13:37 -08:00
10 changed files with 183 additions and 11 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -1571,6 +1571,7 @@ dependencies = [
"codex-core",
"futures",
"reqwest",
"semver",
"serde_json",
"tokio",
"tracing",

View File

@@ -181,6 +181,7 @@ reqwest = "0.12"
rmcp = { version = "0.10.0", default-features = false }
schemars = "0.8.22"
seccompiler = "0.5.0"
semver = "1.0"
sentry = "0.46.0"
serde = "1"
serde_json = "1"

View File

@@ -2,6 +2,7 @@
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
use codex_core::OLLAMA_OSS_PROVIDER_ID;
use codex_core::WireApi;
use codex_core::config::Config;
/// Returns the default model for a given OSS provider.
@@ -13,10 +14,28 @@ pub fn get_default_model_for_oss_provider(provider_id: &str) -> Option<&'static
}
}
/// Detect whether the selected Ollama instance supports the responses API and, if not, downgrade
/// to the chat completions wire API. This should run whenever the Ollama provider is selected,
/// even when `--oss` is not in use, so older servers remain compatible.
pub async fn detect_ollama_wire_api_if_needed(config: &mut Config) {
if config.model_provider_id != OLLAMA_OSS_PROVIDER_ID
|| config.model_provider.wire_api != WireApi::Responses
{
return;
}
match codex_ollama::detect_wire_api(&config.model_provider).await {
Ok(Some(detection)) if detection.wire_api == WireApi::Chat => {
config.model_provider.wire_api = WireApi::Chat;
}
Ok(_) | Err(_) => {}
}
}
/// Ensures the specified OSS provider is ready (models downloaded, service reachable).
pub async fn ensure_oss_provider_ready(
provider_id: &str,
config: &Config,
config: &mut Config,
) -> Result<(), std::io::Error> {
match provider_id {
LMSTUDIO_OSS_PROVIDER_ID => {
@@ -25,6 +44,8 @@ pub async fn ensure_oss_provider_ready(
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
}
OLLAMA_OSS_PROVIDER_ID => {
detect_ollama_wire_api_if_needed(config).await;
codex_ollama::ensure_oss_ready(config)
.await
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;

View File

@@ -273,7 +273,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
("openai", P::create_openai_provider()),
(
OLLAMA_OSS_PROVIDER_ID,
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Responses),
),
(
LMSTUDIO_OSS_PROVIDER_ID,

View File

@@ -13,6 +13,7 @@ pub mod exec_events;
pub use cli::Cli;
pub use cli::Command;
pub use cli::ReviewArgs;
use codex_common::oss::detect_ollama_wire_api_if_needed;
use codex_common::oss::ensure_oss_provider_ready;
use codex_common::oss::get_default_model_for_oss_provider;
use codex_core::AuthManager;
@@ -202,9 +203,13 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
additional_writable_roots: add_dir,
};
let config =
let mut config =
Config::load_with_cli_overrides_and_harness_overrides(cli_kv_overrides, overrides).await?;
if !oss {
detect_ollama_wire_api_if_needed(&mut config).await;
}
if let Err(err) = enforce_login_restrictions(&config).await {
eprintln!("{err}");
std::process::exit(1);
@@ -252,7 +257,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
));
}
};
ensure_oss_provider_ready(provider_id, &config)
ensure_oss_provider_ready(provider_id, &mut config)
.await
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
}

View File

@@ -27,6 +27,7 @@ tokio = { workspace = true, features = [
] }
tracing = { workspace = true, features = ["log"] }
wiremock = { workspace = true }
semver = { workspace = true }
[dev-dependencies]
assert_matches = { workspace = true }

View File

@@ -1,6 +1,7 @@
use bytes::BytesMut;
use futures::StreamExt;
use futures::stream::BoxStream;
use semver::Version;
use serde_json::Value as JsonValue;
use std::collections::VecDeque;
use std::io;
@@ -53,7 +54,7 @@ impl OllamaClient {
}
/// Build a client from a provider definition and verify the server is reachable.
async fn try_from_provider(provider: &ModelProviderInfo) -> io::Result<Self> {
pub(crate) async fn try_from_provider(provider: &ModelProviderInfo) -> io::Result<Self> {
#![expect(clippy::expect_used)]
let base_url = provider
.base_url
@@ -125,6 +126,32 @@ impl OllamaClient {
Ok(names)
}
/// Query the server for its version string, returning `None` when unavailable.
pub async fn fetch_version(&self) -> io::Result<Option<Version>> {
let version_url = format!("{}/api/version", self.host_root.trim_end_matches('/'));
let resp = self
.client
.get(version_url)
.send()
.await
.map_err(io::Error::other)?;
if !resp.status().is_success() {
return Ok(None);
}
let val = resp.json::<JsonValue>().await.map_err(io::Error::other)?;
let Some(version_str) = val.get("version").and_then(|v| v.as_str()).map(str::trim) else {
return Ok(None);
};
let normalized = version_str.trim_start_matches('v');
match Version::parse(normalized) {
Ok(version) => Ok(Some(version)),
Err(err) => {
tracing::warn!("Failed to parse Ollama version `{version_str}`: {err}");
Ok(None)
}
}
}
/// Start a model pull and emit streaming events. The returned stream ends when
/// a Success event is observed or the server closes the connection.
pub async fn pull_model_stream(
@@ -269,6 +296,42 @@ mod tests {
assert!(models.contains(&"mistral".to_string()));
}
#[tokio::test]
async fn test_fetch_version() {
if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
tracing::info!(
"{} is set; skipping test_fetch_version",
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
);
return;
}
let server = wiremock::MockServer::start().await;
wiremock::Mock::given(wiremock::matchers::method("GET"))
.and(wiremock::matchers::path("/api/tags"))
.respond_with(wiremock::ResponseTemplate::new(200).set_body_raw(
serde_json::json!({ "models": [] }).to_string(),
"application/json",
))
.mount(&server)
.await;
wiremock::Mock::given(wiremock::matchers::method("GET"))
.and(wiremock::matchers::path("/api/version"))
.respond_with(wiremock::ResponseTemplate::new(200).set_body_raw(
serde_json::json!({ "version": "0.14.1" }).to_string(),
"application/json",
))
.mount(&server)
.await;
let client = OllamaClient::try_from_provider_with_base_url(server.uri().as_str())
.await
.expect("client");
let version = client.fetch_version().await.expect("version fetch");
assert_eq!(version, Some(Version::new(0, 14, 1)));
}
#[tokio::test]
async fn test_probe_server_happy_path_openai_compat_and_native() {
if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {

View File

@@ -4,15 +4,23 @@ mod pull;
mod url;
pub use client::OllamaClient;
use codex_core::ModelProviderInfo;
use codex_core::WireApi;
use codex_core::config::Config;
pub use pull::CliProgressReporter;
pub use pull::PullEvent;
pub use pull::PullProgressReporter;
pub use pull::TuiProgressReporter;
use semver::Version;
/// Default OSS model to use when `--oss` is passed without an explicit `-m`.
pub const DEFAULT_OSS_MODEL: &str = "gpt-oss:20b";
pub struct WireApiDetection {
pub wire_api: WireApi,
pub version: Option<Version>,
}
/// Prepare the local OSS environment when `--oss` is selected.
///
/// - Ensures a local Ollama server is reachable.
@@ -45,3 +53,67 @@ pub async fn ensure_oss_ready(config: &Config) -> std::io::Result<()> {
Ok(())
}
fn min_responses_version() -> Version {
Version::new(0, 13, 4)
}
fn wire_api_for_version(version: &Version) -> WireApi {
if *version == Version::new(0, 0, 0) {
return WireApi::Responses;
}
if *version >= min_responses_version() {
WireApi::Responses
} else {
WireApi::Chat
}
}
/// Detect which wire API the running Ollama server supports based on its version.
/// Returns `Ok(None)` when the version endpoint is missing or unparsable; callers
/// should keep the configured default in that case.
pub async fn detect_wire_api(
provider: &ModelProviderInfo,
) -> std::io::Result<Option<WireApiDetection>> {
let client = crate::OllamaClient::try_from_provider(provider).await?;
let Some(version) = client.fetch_version().await? else {
return Ok(None);
};
let wire_api = wire_api_for_version(&version);
Ok(Some(WireApiDetection {
wire_api,
version: Some(version),
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_wire_api_for_version_dev_zero_keeps_responses() {
assert_eq!(
wire_api_for_version(&Version::new(0, 0, 0)),
WireApi::Responses
);
}
#[test]
fn test_wire_api_for_version_before_cutoff_is_chat() {
assert_eq!(wire_api_for_version(&Version::new(0, 13, 3)), WireApi::Chat);
}
#[test]
fn test_wire_api_for_version_at_or_after_cutoff_is_responses() {
assert_eq!(
wire_api_for_version(&Version::new(0, 13, 4)),
WireApi::Responses
);
assert_eq!(
wire_api_for_version(&Version::new(0, 14, 0)),
WireApi::Responses
);
}
}

View File

@@ -7,6 +7,7 @@ use additional_dirs::add_dir_warning_message;
use app::App;
pub use app::AppExitInfo;
use codex_app_server_protocol::AuthMode;
use codex_common::oss::detect_ollama_wire_api_if_needed;
use codex_common::oss::ensure_oss_provider_ready;
use codex_common::oss::get_default_model_for_oss_provider;
use codex_core::AuthManager;
@@ -215,7 +216,7 @@ pub async fn run_main(
..Default::default()
};
let config = load_config_or_exit(cli_kv_overrides.clone(), overrides.clone()).await;
let mut config = load_config_or_exit(cli_kv_overrides.clone(), overrides.clone()).await;
if let Some(warning) = add_dir_warning_message(&cli.add_dir, &config.sandbox_policy) {
#[allow(clippy::print_stderr)]
@@ -288,7 +289,7 @@ pub async fn run_main(
));
}
};
ensure_oss_provider_ready(provider_id, &config).await?;
ensure_oss_provider_ready(provider_id, &mut config).await?;
}
let otel = codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION"));
@@ -547,7 +548,10 @@ async fn load_config_or_exit(
) -> Config {
#[allow(clippy::print_stderr)]
match Config::load_with_cli_overrides_and_harness_overrides(cli_kv_overrides, overrides).await {
Ok(config) => config,
Ok(mut config) => {
detect_ollama_wire_api_if_needed(&mut config).await;
config
}
Err(err) => {
eprintln!("Error loading configuration: {err}");
std::process::exit(1);

View File

@@ -7,6 +7,7 @@ use additional_dirs::add_dir_warning_message;
use app::App;
pub use app::AppExitInfo;
use codex_app_server_protocol::AuthMode;
use codex_common::oss::detect_ollama_wire_api_if_needed;
use codex_common::oss::ensure_oss_provider_ready;
use codex_common::oss::get_default_model_for_oss_provider;
use codex_core::AuthManager;
@@ -221,7 +222,7 @@ pub async fn run_main(
additional_writable_roots: additional_dirs,
};
let config = load_config_or_exit(cli_kv_overrides.clone(), overrides.clone()).await;
let mut config = load_config_or_exit(cli_kv_overrides.clone(), overrides.clone()).await;
if let Some(warning) = add_dir_warning_message(&cli.add_dir, &config.sandbox_policy) {
#[allow(clippy::print_stderr)]
@@ -294,7 +295,7 @@ pub async fn run_main(
));
}
};
ensure_oss_provider_ready(provider_id, &config).await?;
ensure_oss_provider_ready(provider_id, &mut config).await?;
}
let otel = codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION"));
@@ -573,7 +574,10 @@ async fn load_config_or_exit(
) -> Config {
#[allow(clippy::print_stderr)]
match Config::load_with_cli_overrides_and_harness_overrides(cli_kv_overrides, overrides).await {
Ok(config) => config,
Ok(mut config) => {
detect_ollama_wire_api_if_needed(&mut config).await;
config
}
Err(err) => {
eprintln!("Error loading configuration: {err}");
std::process::exit(1);