unit test

This commit is contained in:
Ahmed Ibrahim
2025-12-18 19:37:24 -08:00
parent 09693d259b
commit e01610f762
4 changed files with 312 additions and 12 deletions

View File

@@ -249,7 +249,7 @@ impl Codex {
let config = Arc::new(config);
if config.features.enabled(Feature::RemoteModels)
&& let Err(err) = models_manager.refresh_available_models(&config).await
&& let Err(err) = models_manager.try_refresh_available_models(&config).await
{
error!("failed to refresh available models: {err:?}");
}

View File

@@ -77,7 +77,7 @@ impl ModelsManager {
}
/// Fetch the latest remote models, using the on-disk cache when still fresh.
pub async fn refresh_available_models(&self, config: &Config) -> CoreResult<()> {
pub async fn try_refresh_available_models(&self, config: &Config) -> CoreResult<()> {
if !config.features.enabled(Feature::RemoteModels)
|| self.auth_manager.get_auth_mode() == Some(AuthMode::ApiKey)
{
@@ -86,7 +86,15 @@ impl ModelsManager {
if self.try_load_cache().await {
return Ok(());
}
self.refresh_available_models(config).await
}
pub async fn refresh_available_models(&self, config: &Config) -> CoreResult<()> {
if !config.features.enabled(Feature::RemoteModels)
|| self.auth_manager.get_auth_mode() == Some(AuthMode::ApiKey)
{
return Ok(());
}
let auth = self.auth_manager.auth();
let api_provider = self.provider.to_api_provider(Some(AuthMode::ChatGPT))?;
let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
@@ -108,7 +116,7 @@ impl ModelsManager {
}
pub async fn list_models(&self, config: &Config) -> Vec<ModelPreset> {
if let Err(err) = self.refresh_available_models(config).await {
if let Err(err) = self.try_refresh_available_models(config).await {
error!("failed to refresh available models: {err}");
}
let remote_models = self.remote_models(config).await;
@@ -139,7 +147,7 @@ impl ModelsManager {
if let Some(model) = model.as_ref() {
return model.to_string();
}
if let Err(err) = self.refresh_available_models(config).await {
if let Err(err) = self.try_refresh_available_models(config).await {
error!("failed to refresh available models: {err}");
}
// if codex-auto-balanced exists & signed in with chatgpt mode, return it, otherwise return the default model
@@ -410,7 +418,7 @@ mod tests {
let manager = ModelsManager::with_provider(auth_manager, provider);
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("refresh succeeds");
let cached_remote = manager.remote_models(&config).await;
@@ -469,7 +477,7 @@ mod tests {
let manager = ModelsManager::with_provider(auth_manager, provider);
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("first refresh succeeds");
assert_eq!(
@@ -480,7 +488,7 @@ mod tests {
// Second call should read from cache and avoid the network.
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("cached refresh succeeds");
assert_eq!(
@@ -523,7 +531,7 @@ mod tests {
let manager = ModelsManager::with_provider(auth_manager, provider);
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("initial refresh succeeds");
@@ -548,7 +556,7 @@ mod tests {
.await;
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("second refresh succeeds");
assert_eq!(
@@ -594,7 +602,7 @@ mod tests {
manager.cache_ttl = Duration::ZERO;
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("initial refresh succeeds");
@@ -609,7 +617,7 @@ mod tests {
.await;
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("second refresh succeeds");

View File

@@ -670,6 +670,24 @@ pub async fn mount_models_once(server: &MockServer, body: ModelsResponse) -> Mod
models_mock
}
pub async fn mount_models_once_with_etag(
server: &MockServer,
body: ModelsResponse,
etag: &str,
) -> ModelsMock {
let (mock, models_mock) = models_mock();
mock.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "application/json")
.insert_header("etag", etag)
.set_body_json(body.clone()),
)
.up_to_n_times(1)
.mount(server)
.await;
models_mock
}
pub async fn start_mock_server() -> MockServer {
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))

View File

@@ -33,8 +33,12 @@ use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::ev_shell_command_call;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_models_once_with_etag;
use core_test_support::responses::mount_response_once_match;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
@@ -42,6 +46,7 @@ use core_test_support::skip_if_sandbox;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::Value;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::Duration;
@@ -49,9 +54,92 @@ use tokio::time::Instant;
use tokio::time::sleep;
use wiremock::BodyPrintLimit;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
const REMOTE_MODEL_SLUG: &str = "codex-test";
#[derive(Clone, Default)]
struct ResponsesMatch {
etag: Option<String>,
user_text: Option<String>,
call_id: Option<String>,
}
impl ResponsesMatch {
fn with_etag(mut self, etag: &str) -> Self {
self.etag = Some(etag.to_string());
self
}
fn with_user_text(mut self, text: &str) -> Self {
self.user_text = Some(text.to_string());
self
}
fn with_function_call_output(mut self, call_id: &str) -> Self {
self.call_id = Some(call_id.to_string());
self
}
}
impl wiremock::Match for ResponsesMatch {
fn matches(&self, request: &wiremock::Request) -> bool {
if let Some(expected_etag) = &self.etag {
let header = request
.headers
.get("X-If-Models-Match")
.and_then(|value| value.to_str().ok());
if header != Some(expected_etag.as_str()) {
return false;
}
}
let Ok(body): Result<Value, _> = request.body_json() else {
return false;
};
let Some(items) = body.get("input").and_then(Value::as_array) else {
return false;
};
if let Some(expected_text) = &self.user_text
&& !input_has_user_text(items, expected_text)
{
return false;
}
if let Some(expected_call_id) = &self.call_id
&& !input_has_function_call_output(items, expected_call_id)
{
return false;
}
true
}
}
fn input_has_user_text(items: &[Value], expected: &str) -> bool {
items.iter().any(|item| {
item.get("type").and_then(Value::as_str) == Some("message")
&& item.get("role").and_then(Value::as_str) == Some("user")
&& item
.get("content")
.and_then(Value::as_array)
.is_some_and(|content| {
content.iter().any(|span| {
span.get("type").and_then(Value::as_str) == Some("input_text")
&& span.get("text").and_then(Value::as_str) == Some(expected)
})
})
})
}
fn input_has_function_call_output(items: &[Value], call_id: &str) -> bool {
items.iter().any(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
})
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -297,6 +385,192 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_refresh_etag_after_outdated_models() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let remote_model = test_remote_model("remote-etag", ModelVisibility::List, 1);
let initial_etag = "models-etag-initial";
let refreshed_etag = "models-etag-refreshed";
// Phase 1: start Codex with remote models and capture the initial ETag.
let models_mock = mount_models_once_with_etag(
&server,
ModelsResponse {
models: vec![remote_model.clone()],
},
initial_etag,
)
.await;
let harness = build_remote_models_harness(&server, |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some("gpt-5.1".to_string());
})
.await?;
let RemoteModelsHarness {
codex,
cwd,
config,
conversation_manager,
..
} = harness;
let models_manager = conversation_manager.get_models_manager();
wait_for_model_available(&models_manager, "remote-etag", &config).await;
assert_eq!(
models_manager.get_models_etag().await.as_deref(),
Some(initial_etag),
);
assert_eq!(
models_mock.requests().len(),
1,
"expected an initial /models request",
);
assert_eq!(models_mock.requests()[0].url.path(), "/v1/models");
// Phase 2: the tool output turn hits a 412 and triggers a models refresh.
server.reset().await;
let refreshed_models_mock = mount_models_once_with_etag(
&server,
ModelsResponse {
models: vec![remote_model],
},
refreshed_etag,
)
.await;
let call_id = "shell-command-call";
let first_prompt = "run a shell command";
let followup_prompt = "send another message";
let first_response = mount_sse_once_match(
&server,
ResponsesMatch::default()
.with_etag(initial_etag)
.with_user_text(first_prompt),
sse(vec![
ev_response_created("resp-1"),
ev_shell_command_call(call_id, "echo refreshed"),
ev_completed("resp-1"),
]),
)
.await;
let stale_response = mount_response_once_match(
&server,
ResponsesMatch::default()
.with_etag(initial_etag)
.with_function_call_output(call_id),
ResponseTemplate::new(412)
.set_body_string("Models catalog has changed. Please refresh your models list."),
)
.await;
let refreshed_response = mount_sse_once_match(
&server,
ResponsesMatch::default()
.with_etag(refreshed_etag)
.with_function_call_output(call_id),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
// Phase 3: the next user turn should send the refreshed ETag.
let next_turn_response = mount_sse_once_match(
&server,
ResponsesMatch::default()
.with_etag(refreshed_etag)
.with_user_text(followup_prompt),
sse(vec![
ev_response_created("resp-3"),
ev_assistant_message("msg-2", "ok"),
ev_completed("resp-3"),
]),
)
.await;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: first_prompt.into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: "gpt-5.1".to_string(),
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
assert_eq!(
refreshed_models_mock.requests().len(),
1,
"expected a refreshed /models request",
);
assert_eq!(
models_manager.get_models_etag().await.as_deref(),
Some(refreshed_etag),
);
assert_eq!(
first_response.single_request().header("X-If-Models-Match"),
Some(initial_etag.to_string()),
);
assert_eq!(
stale_response.single_request().header("X-If-Models-Match"),
Some(initial_etag.to_string()),
);
assert_eq!(
refreshed_response
.single_request()
.header("X-If-Models-Match"),
Some(refreshed_etag.to_string()),
);
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: followup_prompt.into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: "gpt-5.1".to_string(),
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
assert_eq!(
next_turn_response
.single_request()
.header("X-If-Models-Match"),
Some(refreshed_etag.to_string()),
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_preserve_builtin_presets() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -327,7 +601,7 @@ async fn remote_models_preserve_builtin_presets() -> Result<()> {
);
manager
.refresh_available_models(&config)
.try_refresh_available_models(&config)
.await
.expect("refresh succeeds");