Merge branch 'main' into nornagon/fix-openpty-test

This commit is contained in:
Jeremy Rose
2025-12-11 15:12:10 -08:00
committed by GitHub
1107 changed files with 73769 additions and 5440 deletions

View File

@@ -1,3 +1,5 @@
#![allow(clippy::expect_used)]
use std::sync::Arc;
use codex_app_server_protocol::AuthMode;
@@ -10,6 +12,7 @@ use codex_core::ModelProviderInfo;
use codex_core::Prompt;
use codex_core::ResponseItem;
use codex_core::WireApi;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ReasoningItemContent;
@@ -70,14 +73,15 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
let config = Arc::new(config);
let conversation_id = ConversationId::new();
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let otel_event_manager = OtelEventManager::new(
conversation_id,
config.model.as_str(),
config.model_family.slug.as_str(),
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
Some(AuthMode::ChatGPT),
Some(AuthMode::ApiKey),
false,
"test".to_string(),
);
@@ -85,6 +89,7 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,
@@ -106,11 +111,15 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
}
}
let requests = match server.received_requests().await {
Some(reqs) => reqs,
None => panic!("request not made"),
};
match requests[0].body_json() {
let all_requests = server.received_requests().await.expect("received requests");
let requests: Vec<_> = all_requests
.iter()
.filter(|req| req.method == "POST" && req.url.path().ends_with("/chat/completions"))
.collect();
let request = requests
.first()
.unwrap_or_else(|| panic!("expected POST request to /chat/completions"));
match request.body_json() {
Ok(v) => v,
Err(e) => panic!("invalid json body: {e}"),
}

View File

@@ -1,8 +1,9 @@
use assert_matches::assert_matches;
use codex_core::AuthManager;
use std::sync::Arc;
use tracing_test::traced_test;
use codex_app_server_protocol::AuthMode;
use codex_core::CodexAuth;
use codex_core::ContentItem;
use codex_core::ModelClient;
use codex_core::ModelProviderInfo;
@@ -10,6 +11,7 @@ use codex_core::Prompt;
use codex_core::ResponseEvent;
use codex_core::ResponseItem;
use codex_core::WireApi;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ReasoningItemContent;
@@ -70,14 +72,17 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
let config = Arc::new(config);
let conversation_id = ConversationId::new();
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
let auth_mode = auth_manager.get_auth_mode();
let model = ModelsManager::get_model_offline(config.model.as_deref());
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let otel_event_manager = OtelEventManager::new(
conversation_id,
config.model.as_str(),
config.model_family.slug.as_str(),
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
Some(AuthMode::ChatGPT),
auth_mode,
false,
"test".to_string(),
);
@@ -85,6 +90,7 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,

View File

@@ -11,7 +11,7 @@ path = "lib.rs"
anyhow = { workspace = true }
assert_cmd = { workspace = true }
base64 = { workspace = true }
codex-core = { workspace = true }
codex-core = { workspace = true, features = ["test-support"] }
codex-protocol = { workspace = true }
notify = { workspace = true }
regex-lite = { workspace = true }

View File

@@ -181,6 +181,16 @@ pub fn format_with_current_shell_display(command: &str) -> String {
shlex::try_join(args.iter().map(String::as_str)).expect("serialize current shell command")
}
pub fn format_with_current_shell_non_login(command: &str) -> Vec<String> {
codex_core::shell::default_user_shell().derive_exec_args(command, false)
}
pub fn format_with_current_shell_display_non_login(command: &str) -> String {
let args = format_with_current_shell_non_login(command);
shlex::try_join(args.iter().map(String::as_str))
.expect("serialize current shell command without login")
}
pub mod fs_wait {
use anyhow::Result;
use anyhow::anyhow;
@@ -369,3 +379,13 @@ macro_rules! skip_if_no_network {
}
}};
}
#[macro_export]
macro_rules! skip_if_windows {
($return_value:expr $(,)?) => {{
if cfg!(target_os = "windows") {
println!("Skipping test because it cannot execute on Windows.");
return $return_value;
}
}};
}

View File

@@ -3,6 +3,7 @@ use std::sync::Mutex;
use anyhow::Result;
use base64::Engine;
use codex_protocol::openai_models::ModelsResponse;
use serde_json::Value;
use wiremock::BodyPrintLimit;
use wiremock::Match;
@@ -193,6 +194,38 @@ impl ResponsesRequest {
}
}
#[derive(Debug, Clone)]
pub struct ModelsMock {
requests: Arc<Mutex<Vec<wiremock::Request>>>,
}
impl ModelsMock {
fn new() -> Self {
Self {
requests: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn requests(&self) -> Vec<wiremock::Request> {
self.requests.lock().unwrap().clone()
}
pub fn single_request_path(&self) -> String {
let requests = self.requests.lock().unwrap();
if requests.len() != 1 {
panic!("expected 1 request, got {}", requests.len());
}
requests.first().unwrap().url.path().to_string()
}
}
impl Match for ModelsMock {
fn matches(&self, request: &wiremock::Request) -> bool {
self.requests.lock().unwrap().push(request.clone());
true
}
}
impl Match for ResponseMock {
fn matches(&self, request: &wiremock::Request) -> bool {
self.requests
@@ -560,6 +593,14 @@ fn compact_mock() -> (MockBuilder, ResponseMock) {
(mock, response_mock)
}
fn models_mock() -> (MockBuilder, ModelsMock) {
let models_mock = ModelsMock::new();
let mock = Mock::given(method("GET"))
.and(path_regex(".*/models$"))
.and(models_mock.clone());
(mock, models_mock)
}
pub async fn mount_sse_once_match<M>(server: &MockServer, matcher: M, body: String) -> ResponseMock
where
M: wiremock::Match + Send + Sync + 'static,
@@ -616,11 +657,63 @@ pub async fn mount_compact_json_once(server: &MockServer, body: serde_json::Valu
response_mock
}
pub async fn mount_models_once(server: &MockServer, body: ModelsResponse) -> ModelsMock {
let (mock, models_mock) = models_mock();
mock.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "application/json")
.set_body_json(body.clone()),
)
.up_to_n_times(1)
.mount(server)
.await;
models_mock
}
pub async fn start_mock_server() -> MockServer {
MockServer::builder()
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
// Provide a default `/models` response so tests remain hermetic when the client queries it.
let _ = mount_models_once(
&server,
ModelsResponse {
models: Vec::new(),
etag: String::new(),
},
)
.await;
server
}
// todo(aibrahim): remove this and use our search matching patterns directly
/// Get all POST requests to `/responses` endpoints from the mock server.
/// Filters out GET requests (e.g., `/models`) .
pub async fn get_responses_requests(server: &MockServer) -> Vec<wiremock::Request> {
server
.received_requests()
.await
.expect("mock server should not fail")
.into_iter()
.filter(|req| req.method == "POST" && req.url.path().ends_with("/responses"))
.collect()
}
// todo(aibrahim): remove this and use our search matching patterns directly
/// Get request bodies as JSON values from POST requests to `/responses` endpoints.
/// Filters out GET requests (e.g., `/models`) .
pub async fn get_responses_request_bodies(server: &MockServer) -> Vec<Value> {
get_responses_requests(server)
.await
.into_iter()
.map(|req| {
req.body_json::<Value>()
.expect("request body to be valid JSON")
})
.collect()
}
#[derive(Clone)]
@@ -703,6 +796,10 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
/// in the `input` must have a matching output entry.
fn validate_request_body_invariants(request: &wiremock::Request) {
// Skip GET requests (e.g., /models)
if request.method != "POST" || !request.url.path().ends_with("/responses") {
return;
}
let Ok(body): Result<Value, _> = request.body_json() else {
return;
};

View File

@@ -11,7 +11,6 @@ use codex_core::ModelProviderInfo;
use codex_core::built_in_model_providers;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@@ -24,10 +23,12 @@ use tempfile::TempDir;
use wiremock::MockServer;
use crate::load_default_config_for_test;
use crate::responses::get_responses_request_bodies;
use crate::responses::start_mock_server;
use crate::wait_for_event;
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
/// A collection of different ways the model can output an apply_patch call
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -51,6 +52,7 @@ pub enum ShellModelOutput {
pub struct TestCodexBuilder {
config_mutators: Vec<Box<ConfigMutator>>,
auth: CodexAuth,
pre_build_hooks: Vec<Box<PreBuildHook>>,
}
impl TestCodexBuilder {
@@ -70,11 +72,18 @@ impl TestCodexBuilder {
pub fn with_model(self, model: &str) -> Self {
let new_model = model.to_string();
self.with_config(move |config| {
config.model = new_model.clone();
config.model_family = find_family_for_model(&new_model).expect("model family");
config.model = Some(new_model.clone());
})
}
pub fn with_pre_build_hook<F>(mut self, hook: F) -> Self
where
F: FnOnce(&Path) + Send + 'static,
{
self.pre_build_hooks.push(Box::new(hook));
self
}
pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
let home = Arc::new(TempDir::new()?);
self.build_with_home(server, home, None).await
@@ -98,7 +107,8 @@ impl TestCodexBuilder {
let (config, cwd) = self.prepare_config(server, &home).await?;
let auth = self.auth.clone();
let conversation_manager = ConversationManager::with_auth(auth.clone());
let conversation_manager =
ConversationManager::with_models_provider(auth.clone(), config.model_provider.clone());
let new_conversation = match resume_from {
Some(path) => {
@@ -120,6 +130,7 @@ impl TestCodexBuilder {
config,
codex: new_conversation.conversation,
session_configured: new_conversation.session_configured,
conversation_manager: Arc::new(conversation_manager),
})
}
@@ -136,6 +147,9 @@ impl TestCodexBuilder {
let mut config = load_default_config_for_test(home);
config.cwd = cwd.path().to_path_buf();
config.model_provider = model_provider;
for hook in self.pre_build_hooks.drain(..) {
hook(home.path());
}
if let Ok(cmd) = assert_cmd::Command::cargo_bin("codex") {
config.codex_linux_sandbox_exe = Some(PathBuf::from(cmd.get_program().to_os_string()));
}
@@ -162,6 +176,7 @@ pub struct TestCodex {
pub codex: Arc<CodexConversation>,
pub session_configured: SessionConfiguredEvent,
pub config: Config,
pub conversation_manager: Arc<ConversationManager>,
}
impl TestCodex {
@@ -169,6 +184,10 @@ impl TestCodex {
self.cwd.path()
}
pub fn codex_home_path(&self) -> &Path {
self.config.codex_home.as_path()
}
pub fn workspace_path(&self, rel: impl AsRef<Path>) -> PathBuf {
self.cwd_path().join(rel)
}
@@ -272,13 +291,7 @@ impl TestCodexHarness {
}
pub async fn request_bodies(&self) -> Vec<Value> {
self.server
.received_requests()
.await
.expect("requests")
.into_iter()
.map(|req| serde_json::from_slice(&req.body).expect("request body json"))
.collect()
get_responses_request_bodies(&self.server).await
}
pub async fn function_call_output_value(&self, call_id: &str) -> Value {
@@ -355,5 +368,6 @@ pub fn test_codex() -> TestCodexBuilder {
TestCodexBuilder {
config_mutators: vec![],
auth: CodexAuth::from_api_key("dummy"),
pre_build_hooks: vec![],
}
}

View File

@@ -1,6 +1,8 @@
use std::sync::Arc;
use codex_app_server_protocol::AuthMode;
use codex_core::AuthManager;
use codex_core::CodexAuth;
use codex_core::ContentItem;
use codex_core::ModelClient;
use codex_core::ModelProviderInfo;
@@ -8,8 +10,11 @@ use codex_core::Prompt;
use codex_core::ResponseEvent;
use codex_core::ResponseItem;
use codex_core::WireApi;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ReasoningSummaryFormat;
use codex_protocol::protocol::SessionSource;
use core_test_support::load_default_config_for_test;
use core_test_support::responses;
@@ -56,17 +61,20 @@ async fn responses_stream_includes_subagent_header_on_review() {
config.model_provider = provider.clone();
let effort = config.model_reasoning_effort;
let summary = config.model_reasoning_summary;
let model = ModelsManager::get_model_offline(config.model.as_deref());
config.model = Some(model.clone());
let config = Arc::new(config);
let conversation_id = ConversationId::new();
let auth_mode = AuthMode::ChatGPT;
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let otel_event_manager = OtelEventManager::new(
conversation_id,
config.model.as_str(),
config.model_family.slug.as_str(),
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
Some(AuthMode::ChatGPT),
Some(auth_mode),
false,
"test".to_string(),
);
@@ -74,6 +82,7 @@ async fn responses_stream_includes_subagent_header_on_review() {
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,
@@ -144,17 +153,21 @@ async fn responses_stream_includes_subagent_header_on_other() {
config.model_provider = provider.clone();
let effort = config.model_reasoning_effort;
let summary = config.model_reasoning_summary;
let model = ModelsManager::get_model_offline(config.model.as_deref());
config.model = Some(model.clone());
let config = Arc::new(config);
let conversation_id = ConversationId::new();
let auth_mode = AuthMode::ChatGPT;
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let otel_event_manager = OtelEventManager::new(
conversation_id,
config.model.as_str(),
config.model_family.slug.as_str(),
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
Some(AuthMode::ChatGPT),
Some(auth_mode),
false,
"test".to_string(),
);
@@ -162,6 +175,7 @@ async fn responses_stream_includes_subagent_header_on_other() {
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,
@@ -194,3 +208,110 @@ async fn responses_stream_includes_subagent_header_on_other() {
Some("my-task")
);
}
#[tokio::test]
async fn responses_respects_model_family_overrides_from_config() {
core_test_support::skip_if_no_network!();
let server = responses::start_mock_server().await;
let response_body = responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_completed("resp-1"),
]);
let request_recorder = responses::mount_sse_once(&server, response_body).await;
let provider = ModelProviderInfo {
name: "mock".into(),
base_url: Some(format!("{}/v1", server.uri())),
env_key: None,
env_key_instructions: None,
experimental_bearer_token: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(5_000),
requires_openai_auth: false,
};
let codex_home = TempDir::new().expect("failed to create TempDir");
let mut config = load_default_config_for_test(&codex_home);
config.model = Some("gpt-3.5-turbo".to_string());
config.model_provider_id = provider.name.clone();
config.model_provider = provider.clone();
config.model_supports_reasoning_summaries = Some(true);
config.model_reasoning_summary_format = Some(ReasoningSummaryFormat::Experimental);
config.model_reasoning_summary = ReasoningSummary::Detailed;
let effort = config.model_reasoning_effort;
let summary = config.model_reasoning_summary;
let model = config.model.clone().expect("model configured");
let config = Arc::new(config);
let conversation_id = ConversationId::new();
let auth_mode =
AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")).get_auth_mode();
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let otel_event_manager = OtelEventManager::new(
conversation_id,
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
auth_mode,
false,
"test".to_string(),
);
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,
summary,
conversation_id,
SessionSource::SubAgent(codex_protocol::protocol::SubAgentSource::Other(
"override-check".to_string(),
)),
);
let mut prompt = Prompt::default();
prompt.input = vec![ResponseItem::Message {
id: None,
role: "user".into(),
content: vec![ContentItem::InputText {
text: "hello".into(),
}],
}];
let mut stream = client.stream(&prompt).await.expect("stream failed");
while let Some(event) = stream.next().await {
if matches!(event, Ok(ResponseEvent::Completed { .. })) {
break;
}
}
let request = request_recorder.single_request();
let body = request.body_json();
let reasoning = body
.get("reasoning")
.and_then(|value| value.as_object())
.cloned();
assert!(
reasoning.is_some(),
"reasoning should be present when config enables summaries"
);
assert_eq!(
reasoning
.as_ref()
.and_then(|value| value.get("summary"))
.and_then(|value| value.as_str()),
Some("detailed")
);
}

View File

@@ -1250,3 +1250,94 @@ async fn apply_patch_change_context_disambiguates_target(
assert_eq!(contents, "fn a\nx=10\ny=2\nfn b\nx=11\ny=20\n");
Ok(())
}
/// Ensure that applying a patch can update a CRLF file with unicode characters.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_unicode_characters(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let target = harness.path("unicode.txt");
fs::write(&target, "first ⚠️\nsecond ❌\nthird 🔥\n")?;
let patch = format!(
r#"*** Begin Patch
*** Update File: {}
@@
first ⚠️
-second ❌
+SECOND ✅
@@
third 🔥
+FOURTH
*** End of File
*** End Patch"#,
target.display()
);
let call_id = "apply-unicode-update";
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
harness
.submit("update unicode characters via apply_patch CLI")
.await?;
let file_contents = fs::read(&target)?;
let content = String::from_utf8_lossy(&file_contents);
assert_eq!(content, "first ⚠️\nSECOND ✅\nthird 🔥\nFOURTH\n");
Ok(())
}
/// Ensure that applying a patch via the CLI preserves CRLF line endings for
/// Windows-style inputs even when updating the file contents.
#[cfg(windows)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_crlf_file_preserves_line_endings(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let target = harness.path("crlf.txt");
fs::write(&target, b"first\r\nsecond\r\nthird\r\n")?;
let patch = format!(
r#"*** Begin Patch
*** Update File: {}
@@
first
-second
+SECOND
@@
third
+FOURTH
*** End of File
*** End Patch"#,
target.display()
);
let call_id = "apply-crlf-update";
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
harness
.submit("update crlf file via apply_patch CLI")
.await?;
let file_contents = fs::read(&target)?;
let content = String::from_utf8_lossy(&file_contents);
assert!(content.contains("\r\n"));
assert_eq!(content, "first\r\nSECOND\r\nthird\r\nFOURTH\r\n");
Ok(())
}

View File

@@ -6,8 +6,10 @@ use codex_core::protocol::ApplyPatchApprovalRequestEvent;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecApprovalRequestEvent;
use codex_core::protocol::ExecPolicyAmendment;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_core::sandboxing::SandboxPermissions;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::user_input::UserInput;
@@ -95,14 +97,14 @@ impl ActionKind {
test: &TestCodex,
server: &MockServer,
call_id: &str,
with_escalated_permissions: bool,
sandbox_permissions: SandboxPermissions,
) -> Result<(Value, Option<String>)> {
match self {
ActionKind::WriteFile { target, content } => {
let (path, _) = target.resolve_for_patch(test);
let _ = fs::remove_file(&path);
let command = format!("printf {content:?} > {path:?} && cat {path:?}");
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
let event = shell_event(call_id, &command, 1_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
ActionKind::FetchUrl {
@@ -124,11 +126,11 @@ impl ActionKind {
);
let command = format!("python3 -c \"{script}\"");
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
let event = shell_event(call_id, &command, 3_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
ActionKind::RunCommand { command } => {
let event = shell_event(call_id, command, 1_000, with_escalated_permissions)?;
let event = shell_event(call_id, command, 1_000, sandbox_permissions)?;
Ok((event, Some(command.to_string())))
}
ActionKind::RunUnifiedExecCommand {
@@ -139,7 +141,7 @@ impl ActionKind {
call_id,
command,
Some(1000),
with_escalated_permissions,
sandbox_permissions,
*justification,
)?;
Ok((event, Some(command.to_string())))
@@ -155,7 +157,7 @@ impl ActionKind {
let _ = fs::remove_file(&path);
let patch = build_add_file_patch(&patch_path, content);
let command = shell_apply_patch_command(&patch);
let event = shell_event(call_id, &command, 5_000, with_escalated_permissions)?;
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
}
@@ -180,14 +182,14 @@ fn shell_event(
call_id: &str,
command: &str,
timeout_ms: u64,
with_escalated_permissions: bool,
sandbox_permissions: SandboxPermissions,
) -> Result<Value> {
let mut args = json!({
"command": command,
"timeout_ms": timeout_ms,
});
if with_escalated_permissions {
args["with_escalated_permissions"] = json!(true);
if sandbox_permissions.requires_escalated_permissions() {
args["sandbox_permissions"] = json!(sandbox_permissions);
}
let args_str = serde_json::to_string(&args)?;
Ok(ev_function_call(call_id, "shell_command", &args_str))
@@ -197,7 +199,7 @@ fn exec_command_event(
call_id: &str,
cmd: &str,
yield_time_ms: Option<u64>,
with_escalated_permissions: bool,
sandbox_permissions: SandboxPermissions,
justification: Option<&str>,
) -> Result<Value> {
let mut args = json!({
@@ -206,8 +208,8 @@ fn exec_command_event(
if let Some(yield_time_ms) = yield_time_ms {
args["yield_time_ms"] = json!(yield_time_ms);
}
if with_escalated_permissions {
args["with_escalated_permissions"] = json!(true);
if sandbox_permissions.requires_escalated_permissions() {
args["sandbox_permissions"] = json!(sandbox_permissions);
let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
args["justification"] = json!(reason);
}
@@ -465,7 +467,7 @@ struct ScenarioSpec {
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
action: ActionKind,
with_escalated_permissions: bool,
sandbox_permissions: SandboxPermissions,
features: Vec<Feature>,
model_override: Option<&'static str>,
outcome: Outcome,
@@ -636,7 +638,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
content: "danger-on-request",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -653,7 +655,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -670,7 +672,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/dfa/network",
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -686,7 +688,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/dfa/network",
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -701,7 +703,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
action: ActionKind::RunCommand {
command: "echo trusted-unless",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -716,7 +718,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
action: ActionKind::RunCommand {
command: "echo trusted-unless",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -732,7 +734,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
content: "danger-on-failure",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -749,7 +751,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -766,7 +768,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -786,7 +788,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
@@ -806,7 +808,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
content: "danger-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -823,7 +825,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -840,7 +842,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_on_request.txt"),
content: "read-only-approval",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -860,7 +862,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
@@ -879,7 +881,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
action: ActionKind::RunCommand {
command: "echo trusted-read-only",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -894,7 +896,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
action: ActionKind::RunCommand {
command: "echo trusted-read-only",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
@@ -910,7 +912,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/ro/network-blocked",
response_body: "should-not-see",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
@@ -924,7 +926,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_on_request_denied.txt"),
content: "should-not-write",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
@@ -945,7 +947,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_on_failure.txt"),
content: "read-only-on-failure",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -966,7 +968,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
@@ -986,7 +988,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/ro/network-approved",
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1005,7 +1007,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/ro/network-approved",
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
@@ -1024,7 +1026,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("apply_patch_shell.txt"),
content: "shell-apply-patch",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::PatchApproval {
@@ -1044,7 +1046,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("apply_patch_function.txt"),
content: "function-apply-patch",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
@@ -1061,7 +1063,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
content: "function-patch-danger",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
@@ -1078,7 +1080,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
content: "function-patch-outside",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
@@ -1098,7 +1100,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
content: "function-patch-outside-denied",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
@@ -1118,7 +1120,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
content: "shell-patch-outside",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::PatchApproval {
@@ -1138,7 +1140,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
content: "function-patch-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
@@ -1158,7 +1160,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
content: "function-patch-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
@@ -1177,7 +1179,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_unless_trusted.txt"),
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1197,7 +1199,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
@@ -1217,7 +1219,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ro_never.txt"),
content: "read-only-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
@@ -1240,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
action: ActionKind::RunCommand {
command: "echo trusted-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -1256,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::Workspace("ww_on_request.txt"),
content: "workspace-on-request",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -1273,7 +1275,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/ww/network-blocked",
response_body: "workspace-network-blocked",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
@@ -1287,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
content: "workspace-on-request-outside",
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1307,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
endpoint: "/ww/network-ok",
response_body: "workspace-network-ok",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -1324,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
content: "workspace-on-failure",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1344,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
content: "workspace-unless-trusted",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1364,7 +1366,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
target: TargetPath::OutsideWorkspace("ww_never.txt"),
content: "workspace-never",
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
@@ -1388,7 +1390,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: "echo \"hello unified exec\"",
justification: None,
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![Feature::UnifiedExec],
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
@@ -1406,7 +1408,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
},
with_escalated_permissions: true,
sandbox_permissions: SandboxPermissions::RequireEscalated,
features: vec![Feature::UnifiedExec],
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
@@ -1425,7 +1427,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: "git reset --hard",
justification: None,
},
with_escalated_permissions: false,
sandbox_permissions: SandboxPermissions::UseDefault,
features: vec![Feature::UnifiedExec],
model_override: None,
outcome: Outcome::ExecApproval {
@@ -1471,7 +1473,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let call_id = scenario.name;
let (event, expected_command) = scenario
.action
.prepare(&test, &server, call_id, scenario.with_escalated_permissions)
.prepare(&test, &server, call_id, scenario.sandbox_permissions)
.await?;
let _ = mount_sse_once(
@@ -1523,7 +1525,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
test.codex
.submit(Op::ExecApproval {
id: "0".into(),
decision: *decision,
decision: decision.clone(),
})
.await?;
wait_for_completion(&test).await;
@@ -1544,7 +1546,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
test.codex
.submit(Op::PatchApproval {
id: "0".into(),
decision: *decision,
decision: decision.clone(),
})
.await?;
wait_for_completion(&test).await;
@@ -1557,3 +1559,162 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "current_thread")]
#[cfg(unix)]
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::UnlessTrusted;
let sandbox_policy = SandboxPolicy::ReadOnly;
let sandbox_policy_for_config = sandbox_policy.clone();
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = approval_policy;
config.sandbox_policy = sandbox_policy_for_config;
});
let test = builder.build(&server).await?;
let allow_prefix_path = test.cwd.path().join("allow-prefix.txt");
let _ = fs::remove_file(&allow_prefix_path);
let call_id_first = "allow-prefix-first";
let (first_event, expected_command) = ActionKind::RunCommand {
command: "touch allow-prefix.txt",
}
.prepare(
&test,
&server,
call_id_first,
SandboxPermissions::UseDefault,
)
.await?;
let expected_command =
expected_command.expect("execpolicy amendment scenario should produce a shell command");
let expected_execpolicy_amendment =
ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]);
let _ = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-allow-prefix-1"),
first_event,
ev_completed("resp-allow-prefix-1"),
]),
)
.await;
let first_results = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-allow-prefix-1", "done"),
ev_completed("resp-allow-prefix-2"),
]),
)
.await;
submit_turn(
&test,
"allow-prefix-first",
approval_policy,
sandbox_policy.clone(),
)
.await?;
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
assert_eq!(
approval.proposed_execpolicy_amendment,
Some(expected_execpolicy_amendment.clone())
);
test.codex
.submit(Op::ExecApproval {
id: "0".into(),
decision: ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(),
},
})
.await?;
wait_for_completion(&test).await;
let policy_path = test.home.path().join("rules").join("default.rules");
let policy_contents = fs::read_to_string(&policy_path)?;
assert!(
policy_contents
.contains(r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#),
"unexpected policy contents: {policy_contents}"
);
let first_output = parse_result(
&first_results
.single_request()
.function_call_output(call_id_first),
);
assert_eq!(first_output.exit_code.unwrap_or(0), 0);
assert!(
first_output.stdout.is_empty(),
"unexpected stdout: {}",
first_output.stdout
);
assert_eq!(
fs::read_to_string(&allow_prefix_path)?,
"",
"unexpected file contents after first run"
);
let call_id_second = "allow-prefix-second";
let (second_event, second_command) = ActionKind::RunCommand {
command: "touch allow-prefix.txt",
}
.prepare(
&test,
&server,
call_id_second,
SandboxPermissions::UseDefault,
)
.await?;
assert_eq!(second_command.as_deref(), Some(expected_command.as_str()));
let _ = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-allow-prefix-3"),
second_event,
ev_completed("resp-allow-prefix-3"),
]),
)
.await;
let second_results = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-allow-prefix-2", "done"),
ev_completed("resp-allow-prefix-4"),
]),
)
.await;
submit_turn(
&test,
"allow-prefix-second",
approval_policy,
sandbox_policy.clone(),
)
.await?;
wait_for_completion_without_approval(&test).await;
let second_output = parse_result(
&second_results
.single_request()
.function_call_output(call_id_second),
);
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
assert!(
second_output.stdout.is_empty(),
"unexpected stdout: {}",
second_output.stdout
);
assert_eq!(
fs::read_to_string(&allow_prefix_path)?,
"",
"unexpected file contents after second run"
);
Ok(())
}

View File

@@ -1,4 +1,4 @@
use codex_app_server_protocol::AuthMode;
use codex_core::AuthManager;
use codex_core::CodexAuth;
use codex_core::ContentItem;
use codex_core::ConversationManager;
@@ -16,21 +16,27 @@ use codex_core::auth::AuthCredentialsStoreMode;
use codex_core::built_in_model_providers;
use codex_core::error::CodexErr;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::config_types::ReasoningEffort;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::config_types::Verbosity;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::WebSearchAction;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::responses;
use core_test_support::responses::ev_completed_with_tokens;
use core_test_support::responses::get_responses_requests;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
use core_test_support::responses::sse_failed;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
@@ -240,7 +246,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
// Mock server that will receive the resumed request
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
// Configure Codex to resume from our file
let model_provider = ModelProviderInfo {
@@ -253,8 +259,10 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
// Also configure user instructions to ensure they are NOT delivered on resume.
config.user_instructions = Some("be nice".to_string());
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let auth_manager =
codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
let NewConversation {
@@ -337,8 +345,10 @@ async fn includes_conversation_id_and_model_headers_in_request() {
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
conversation_id,
@@ -360,7 +370,10 @@ async fn includes_conversation_id_and_model_headers_in_request() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// get request from the server
let request = &server.received_requests().await.unwrap()[0];
let requests = get_responses_requests(&server).await;
let request = requests
.first()
.expect("expected POST request to /responses");
let request_conversation_id = request.headers.get("conversation_id").unwrap();
let request_authorization = request.headers.get("authorization").unwrap();
let request_originator = request.headers.get("originator").unwrap();
@@ -381,7 +394,7 @@ async fn includes_base_instructions_override_in_request() {
skip_if_no_network!();
// Mock server
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -393,8 +406,10 @@ async fn includes_base_instructions_override_in_request() {
config.base_instructions = Some("test instructions".to_string());
config.model_provider = model_provider;
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -451,7 +466,10 @@ async fn chatgpt_auth_sends_correct_request() {
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
let conversation_manager = ConversationManager::with_models_provider(
create_dummy_codex_auth(),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
conversation_id,
@@ -473,7 +491,10 @@ async fn chatgpt_auth_sends_correct_request() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// get request from the server
let request = &server.received_requests().await.unwrap()[0];
let requests = get_responses_requests(&server).await;
let request = requests
.first()
.expect("expected POST request to /responses");
let request_conversation_id = request.headers.get("conversation_id").unwrap();
let request_authorization = request.headers.get("authorization").unwrap();
let request_originator = request.headers.get("originator").unwrap();
@@ -569,7 +590,7 @@ async fn includes_user_instructions_message_in_request() {
skip_if_no_network!();
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -581,8 +602,10 @@ async fn includes_user_instructions_message_in_request() {
config.model_provider = model_provider;
config.user_instructions = Some("be nice".to_string());
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -627,7 +650,7 @@ async fn skills_append_to_instructions_when_feature_enabled() {
skip_if_no_network!();
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -648,8 +671,10 @@ async fn skills_append_to_instructions_when_feature_enabled() {
config.features.enable(Feature::Skills);
config.cwd = codex_home.path().to_path_buf();
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -695,7 +720,7 @@ async fn includes_configured_effort_in_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
@@ -734,7 +759,7 @@ async fn includes_no_effort_in_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5.1-codex")
.build(&server)
@@ -771,7 +796,7 @@ async fn includes_default_reasoning_effort_in_request_when_defined_by_model_fami
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?;
codex
@@ -799,12 +824,87 @@ async fn includes_default_reasoning_effort_in_request_when_defined_by_model_fami
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn configured_reasoning_summary_is_sent() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.model_reasoning_summary = ReasoningSummary::Concise;
})
.build(&server)
.await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
pretty_assertions::assert_eq!(
request_body
.get("reasoning")
.and_then(|reasoning| reasoning.get("summary"))
.and_then(|value| value.as_str()),
Some("concise")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_summary_is_omitted_when_disabled() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.model_reasoning_summary = ReasoningSummary::None;
})
.build(&server)
.await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
pretty_assertions::assert_eq!(
request_body
.get("reasoning")
.and_then(|reasoning| reasoning.get("summary")),
None
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn includes_default_verbosity_in_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?;
codex
@@ -837,7 +937,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
@@ -875,7 +975,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5.1")
.with_config(|config| {
@@ -914,7 +1014,7 @@ async fn includes_developer_instructions_message_in_request() {
skip_if_no_network!();
let server = MockServer::start().await;
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -927,8 +1027,10 @@ async fn includes_developer_instructions_message_in_request() {
config.user_instructions = Some("be nice".to_string());
config.developer_instructions = Some("be useful".to_string());
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1014,17 +1116,19 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
config.model_provider = provider.clone();
let effort = config.model_reasoning_effort;
let summary = config.model_reasoning_summary;
let model = ModelsManager::get_model_offline(config.model.as_deref());
config.model = Some(model.clone());
let config = Arc::new(config);
let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
let conversation_id = ConversationId::new();
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
let otel_event_manager = OtelEventManager::new(
conversation_id,
config.model.as_str(),
config.model_family.slug.as_str(),
model.as_str(),
model_family.slug.as_str(),
None,
Some("test@test.com".to_string()),
Some(AuthMode::ChatGPT),
auth_manager.get_auth_mode(),
false,
"test".to_string(),
);
@@ -1032,6 +1136,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
let client = ModelClient::new(
Arc::clone(&config),
None,
model_family,
otel_event_manager,
provider,
effort,
@@ -1102,11 +1207,8 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
}
}
let requests = server
.received_requests()
.await
.expect("mock server collected requests");
assert_eq!(requests.len(), 1, "expected a single request");
let requests = get_responses_requests(&server).await;
assert_eq!(requests.len(), 1, "expected a single POST request");
let body: serde_json::Value = requests[0]
.body_json()
.expect("request body to be valid JSON");
@@ -1127,7 +1229,7 @@ async fn token_count_includes_rate_limits_snapshot() {
skip_if_no_network!();
let server = MockServer::start().await;
let sse_body = responses::sse(vec![responses::ev_completed_with_tokens("resp_rate", 123)]);
let sse_body = sse(vec![ev_completed_with_tokens("resp_rate", 123)]);
let response = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
@@ -1153,7 +1255,10 @@ async fn token_count_includes_rate_limits_snapshot() {
let mut config = load_default_config_for_test(&home);
config.model_provider = provider;
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("test"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1192,7 +1297,8 @@ async fn token_count_includes_rate_limits_snapshot() {
"window_minutes": 60,
"resets_at": 1704074400
},
"credits": null
"credits": null,
"plan_type": null
}
})
);
@@ -1240,7 +1346,8 @@ async fn token_count_includes_rate_limits_snapshot() {
"window_minutes": 60,
"resets_at": 1704074400
},
"credits": null
"credits": null,
"plan_type": null
}
})
);
@@ -1311,7 +1418,8 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
"window_minutes": 60,
"resets_at": null
},
"credits": null
"credits": null,
"plan_type": null
});
let submission_id = codex
@@ -1357,10 +1465,10 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100;
responses::mount_sse_once_match(
mount_sse_once_match(
&server,
body_string_contains("trigger context window"),
responses::sse_failed(
sse_failed(
"resp_context_window",
"context_length_exceeded",
"Your input exceeds the context window of this model. Please adjust your input and try again.",
@@ -1368,7 +1476,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
)
.await;
responses::mount_sse_once_match(
mount_sse_once_match(
&server,
body_string_contains("seed turn"),
sse_completed("resp_seed"),
@@ -1377,9 +1485,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.model = "gpt-5.1".to_string();
config.model_family =
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
config.model = Some("gpt-5.1".to_string());
config.model_context_window = Some(272_000);
})
.build(&server)
@@ -1503,7 +1609,10 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = provider;
let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
let conversation_manager = ConversationManager::with_models_provider(
create_dummy_codex_auth(),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1581,7 +1690,10 @@ async fn env_var_overrides_loaded_auth() {
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = provider;
let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
let conversation_manager = ConversationManager::with_models_provider(
create_dummy_codex_auth(),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1659,8 +1771,10 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
..
@@ -1697,7 +1811,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Inspect the three captured requests.
let requests = server.received_requests().await.unwrap();
let requests = get_responses_requests(&server).await;
assert_eq!(requests.len(), 3, "expected 3 requests (one per turn)");
// Replace full-array compare with tail-only raw JSON compare using a single hard-coded value.

View File

@@ -5,6 +5,7 @@ use codex_core::protocol::ReviewDecision;
use codex_core::protocol::ReviewRequest;
use codex_core::protocol::ReviewTarget;
use codex_core::protocol::SandboxPolicy;
use codex_core::sandboxing::SandboxPermissions;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
@@ -31,7 +32,7 @@ async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
let args = serde_json::json!({
"command": "rm -rf delegated",
"timeout_ms": 1000,
"with_escalated_permissions": true,
"sandbox_permissions": SandboxPermissions::RequireEscalated,
})
.to_string();
let sse1 = sse(vec![

View File

@@ -28,6 +28,7 @@ use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_completed_with_tokens;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::get_responses_requests;
use core_test_support::responses::mount_compact_json_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_once_match;
@@ -135,7 +136,10 @@ async fn summarize_context_three_requests_and_instructions() {
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(200_000);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
session_configured,
@@ -329,7 +333,10 @@ async fn manual_compact_uses_custom_prompt() {
config.model_provider = model_provider;
config.compact_prompt = Some(custom_prompt.to_string());
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -344,7 +351,7 @@ async fn manual_compact_uses_custom_prompt() {
assert_eq!(message, COMPACT_WARNING_MESSAGE);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("collect requests");
let requests = get_responses_requests(&server).await;
let body = requests
.iter()
.find_map(|req| req.body_json::<serde_json::Value>().ok())
@@ -409,7 +416,10 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
..
@@ -570,7 +580,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// collect the requests payloads from the model
let requests_payloads = server.received_requests().await.unwrap();
let requests_payloads = get_responses_requests(&server).await;
let body = requests_payloads[0]
.body_json::<serde_json::Value>()
@@ -1050,7 +1060,10 @@ async fn auto_compact_runs_after_token_limit_hit() {
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(200_000);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1090,7 +1103,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.unwrap();
let requests = get_responses_requests(&server).await;
assert_eq!(
requests.len(),
5,
@@ -1295,7 +1308,10 @@ async fn auto_compact_persists_rollout_entries() {
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
session_configured,
@@ -1397,11 +1413,14 @@ async fn manual_compact_retries_after_context_window_error() {
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(200_000);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
let codex = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
)
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
@@ -1529,11 +1548,14 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
let codex = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
)
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
@@ -1731,7 +1753,10 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(200);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -1771,10 +1796,8 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
"auto compact should not emit task lifecycle events"
);
let request_bodies: Vec<String> = server
.received_requests()
.await
.unwrap()
let requests = get_responses_requests(&server).await;
let request_bodies: Vec<String> = requests
.into_iter()
.map(|request| String::from_utf8(request.body).unwrap_or_default())
.collect();
@@ -1845,11 +1868,14 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
config.model_context_window = Some(context_window);
config.model_auto_compact_token_limit = Some(limit);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
let codex = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
)
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {

View File

@@ -26,6 +26,7 @@ use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::get_responses_request_bodies;
use core_test_support::responses::mount_sse_once_match;
use core_test_support::responses::sse;
use core_test_support::wait_for_event;
@@ -771,17 +772,11 @@ fn normalize_line_endings(value: &mut Value) {
}
async fn gather_request_bodies(server: &MockServer) -> Vec<Value> {
server
.received_requests()
.await
.expect("mock server should not fail")
.into_iter()
.map(|req| {
let mut value = req.body_json::<Value>().expect("valid JSON body");
normalize_line_endings(&mut value);
value
})
.collect()
let mut bodies = get_responses_request_bodies(server).await;
for body in &mut bodies {
normalize_line_endings(body);
}
bodies
}
async fn mount_initial_flow(server: &MockServer) {
@@ -870,9 +865,12 @@ async fn start_test_conversation(
config.model_provider = model_provider;
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
if let Some(model) = model {
config.model = model.to_string();
config.model = Some(model.to_string());
}
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation { conversation, .. } = manager
.new_conversation(config.clone())
.await

View File

@@ -36,7 +36,7 @@ async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<()
let DeprecationNoticeEvent { summary, details } = notice;
assert_eq!(
summary,
"`use_experimental_unified_exec_tool` is deprecated. Use `unified_exec` instead."
"`use_experimental_unified_exec_tool` is deprecated. Use `[features].unified_exec` instead."
.to_string(),
);
assert_eq!(

View File

@@ -8,6 +8,7 @@ use codex_core::exec::ExecToolCallOutput;
use codex_core::exec::SandboxType;
use codex_core::exec::process_exec_tool_call;
use codex_core::protocol::SandboxPolicy;
use codex_core::sandboxing::SandboxPermissions;
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
use tempfile::TempDir;
@@ -34,7 +35,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
cwd: tmp.path().to_path_buf(),
expiration: 1000.into(),
env: HashMap::new(),
with_escalated_permissions: None,
sandbox_permissions: SandboxPermissions::UseDefault,
justification: None,
arg0: None,
};

View File

@@ -27,7 +27,7 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
}
let mut builder = test_codex().with_config(|config| {
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
let policy_path = config.codex_home.join("rules").join("policy.rules");
fs::create_dir_all(
policy_path
.parent()

View File

@@ -55,7 +55,10 @@ async fn fork_conversation_twice_drops_to_first_message() {
config.model_provider = model_provider.clone();
let config_for_fork = config.clone();
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
..

View File

@@ -0,0 +1,218 @@
use anyhow::Result;
use codex_core::CodexAuth;
use codex_core::ConversationManager;
use codex_core::built_in_model_providers;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use core_test_support::load_default_config_for_test;
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn list_models_returns_api_key_models() -> Result<()> {
let codex_home = tempdir()?;
let config = load_default_config_for_test(&codex_home);
let manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("sk-test"),
built_in_model_providers()["openai"].clone(),
);
let models = manager.list_models(&config).await;
let expected_models = expected_models_for_api_key();
assert_eq!(expected_models, models);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn list_models_returns_chatgpt_models() -> Result<()> {
let codex_home = tempdir()?;
let config = load_default_config_for_test(&codex_home);
let manager = ConversationManager::with_models_provider(
CodexAuth::create_dummy_chatgpt_auth_for_testing(),
built_in_model_providers()["openai"].clone(),
);
let models = manager.list_models(&config).await;
let expected_models = expected_models_for_chatgpt();
assert_eq!(expected_models, models);
Ok(())
}
fn expected_models_for_api_key() -> Vec<ModelPreset> {
vec![
gpt_5_1_codex_max(),
gpt_5_1_codex(),
gpt_5_1_codex_mini(),
gpt_5_2(),
gpt_5_1(),
]
}
fn expected_models_for_chatgpt() -> Vec<ModelPreset> {
vec![
gpt_5_1_codex_max(),
gpt_5_1_codex(),
gpt_5_1_codex_mini(),
gpt_5_2(),
gpt_5_1(),
]
}
fn gpt_5_1_codex_max() -> ModelPreset {
ModelPreset {
id: "gpt-5.1-codex-max".to_string(),
model: "gpt-5.1-codex-max".to_string(),
display_name: "gpt-5.1-codex-max".to_string(),
description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: vec![
effort(
ReasoningEffort::Low,
"Fast responses with lighter reasoning",
),
effort(
ReasoningEffort::Medium,
"Balances speed and reasoning depth for everyday tasks",
),
effort(
ReasoningEffort::High,
"Maximizes reasoning depth for complex problems",
),
effort(
ReasoningEffort::XHigh,
"Extra high reasoning depth for complex problems",
),
],
is_default: true,
upgrade: None,
show_in_picker: true,
}
}
fn gpt_5_1_codex() -> ModelPreset {
ModelPreset {
id: "gpt-5.1-codex".to_string(),
model: "gpt-5.1-codex".to_string(),
display_name: "gpt-5.1-codex".to_string(),
description: "Optimized for codex.".to_string(),
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: vec![
effort(
ReasoningEffort::Low,
"Fastest responses with limited reasoning",
),
effort(
ReasoningEffort::Medium,
"Dynamically adjusts reasoning based on the task",
),
effort(
ReasoningEffort::High,
"Maximizes reasoning depth for complex or ambiguous problems",
),
],
is_default: false,
upgrade: Some(gpt_5_1_codex_max_upgrade()),
show_in_picker: true,
}
}
fn gpt_5_1_codex_mini() -> ModelPreset {
ModelPreset {
id: "gpt-5.1-codex-mini".to_string(),
model: "gpt-5.1-codex-mini".to_string(),
display_name: "gpt-5.1-codex-mini".to_string(),
description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(),
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: vec![
effort(
ReasoningEffort::Medium,
"Dynamically adjusts reasoning based on the task",
),
effort(
ReasoningEffort::High,
"Maximizes reasoning depth for complex or ambiguous problems",
),
],
is_default: false,
upgrade: Some(gpt_5_1_codex_max_upgrade()),
show_in_picker: true,
}
}
fn gpt_5_2() -> ModelPreset {
ModelPreset {
id: "gpt-5.2".to_string(),
model: "gpt-5.2".to_string(),
display_name: "gpt-5.2".to_string(),
description:
"Latest frontier model with improvements across knowledge, reasoning and coding"
.to_string(),
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: vec![
effort(
ReasoningEffort::Low,
"Balances speed with some reasoning; useful for straightforward queries and short explanations",
),
effort(
ReasoningEffort::Medium,
"Provides a solid balance of reasoning depth and latency for general-purpose tasks",
),
effort(
ReasoningEffort::High,
"Maximizes reasoning depth for complex or ambiguous problems",
),
effort(
ReasoningEffort::XHigh,
"Extra high reasoning for complex problems",
),
],
is_default: false,
upgrade: None,
show_in_picker: true,
}
}
fn gpt_5_1() -> ModelPreset {
ModelPreset {
id: "gpt-5.1".to_string(),
model: "gpt-5.1".to_string(),
display_name: "gpt-5.1".to_string(),
description: "Broad world knowledge with strong general reasoning.".to_string(),
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: vec![
effort(
ReasoningEffort::Low,
"Balances speed with some reasoning; useful for straightforward queries and short explanations",
),
effort(
ReasoningEffort::Medium,
"Provides a solid balance of reasoning depth and latency for general-purpose tasks",
),
effort(
ReasoningEffort::High,
"Maximizes reasoning depth for complex or ambiguous problems",
),
],
is_default: false,
upgrade: Some(gpt_5_1_codex_max_upgrade()),
show_in_picker: true,
}
}
fn gpt_5_1_codex_max_upgrade() -> codex_protocol::openai_models::ModelUpgrade {
codex_protocol::openai_models::ModelUpgrade {
id: "gpt-5.1-codex-max".to_string(),
reasoning_effort_mapping: None,
migration_config_key: "hide_gpt-5.1-codex-max_migration_prompt".to_string(),
}
}
fn effort(reasoning_effort: ReasoningEffort, description: &str) -> ReasoningEffortPreset {
ReasoningEffortPreset {
effort: reasoning_effort,
description: description.to_string(),
}
}

View File

@@ -15,7 +15,6 @@ pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe {
#[cfg(not(target_os = "windows"))]
mod abort_tasks;
#[cfg(not(target_os = "windows"))]
mod apply_patch_cli;
#[cfg(not(target_os = "windows"))]
mod approvals;
@@ -34,6 +33,7 @@ mod grep_files;
mod items;
mod json_result;
mod list_dir;
mod list_models;
mod live_cli;
mod model_overrides;
mod model_tools;
@@ -41,12 +41,16 @@ mod otel;
mod prompt_caching;
mod quota_exceeded;
mod read_file;
mod remote_models;
mod resume;
mod review;
mod rmcp_client;
mod rollout_list_find;
mod seatbelt;
mod shell_command;
mod shell_serialization;
mod shell_snapshot;
mod skills;
mod stream_error_allows_next_turn;
mod stream_no_completed;
mod text_encoding_fix;

View File

@@ -2,7 +2,7 @@ use codex_core::CodexAuth;
use codex_core::ConversationManager;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol_config_types::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffort;
use core_test_support::load_default_config_for_test;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
@@ -20,10 +20,12 @@ async fn override_turn_context_does_not_persist_when_config_exists() {
.expect("seed config.toml");
let mut config = load_default_config_for_test(&codex_home);
config.model = "gpt-4o".to_string();
config.model = Some("gpt-4o".to_string());
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
@@ -62,8 +64,10 @@ async fn override_turn_context_does_not_create_config_file() {
let config = load_default_config_for_test(&codex_home);
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await

View File

@@ -1,16 +1,15 @@
#![allow(clippy::unwrap_used)]
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_core::protocol_config_types::ReasoningEffort;
use codex_core::protocol_config_types::ReasoningSummary;
use codex_core::shell::Shell;
use codex_core::shell::default_user_shell;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::user_input::UserInput;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::responses::mount_sse_once;
@@ -72,9 +71,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.features.disable(Feature::ApplyPatchFreeform);
config.model = "codex-mini-latest".to_string();
config.model_family = find_family_for_model("codex-mini-latest")
.expect("model family for codex-mini-latest");
config.model = Some("codex-mini-latest".to_string());
})
.build(&server)
.await?;
@@ -126,13 +123,30 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
let req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let TestCodex { codex, config, .. } = test_codex()
let TestCodex {
codex,
config,
conversation_manager,
..
} = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
config.model = Some("gpt-5.1-codex-max".to_string());
})
.build(&server)
.await?;
let base_instructions = config.model_family.base_instructions.clone();
let base_instructions = conversation_manager
.get_models_manager()
.construct_model_family(
config
.model
.as_deref()
.expect("test config should have a model"),
&config,
)
.await
.base_instructions
.clone();
codex
.submit(Op::UserInput {
@@ -565,7 +579,12 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let TestCodex { codex, config, .. } = test_codex()
let TestCodex {
codex,
config,
session_configured,
..
} = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
})
@@ -575,7 +594,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let default_cwd = config.cwd.clone();
let default_approval_policy = config.approval_policy;
let default_sandbox_policy = config.sandbox_policy.clone();
let default_model = config.model.clone();
let default_model = session_configured.model;
let default_effort = config.model_reasoning_effort;
let default_summary = config.model_reasoning_summary;
@@ -652,7 +671,12 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
let req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let TestCodex { codex, config, .. } = test_codex()
let TestCodex {
codex,
config,
session_configured,
..
} = test_codex()
.with_config(|config| {
config.user_instructions = Some("be consistent and helpful".to_string());
})
@@ -662,7 +686,7 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
let default_cwd = config.cwd.clone();
let default_approval_policy = config.approval_policy;
let default_sandbox_policy = config.sandbox_policy.clone();
let default_model = config.model.clone();
let default_model = session_configured.model;
let default_effort = config.model_reasoning_effort;
let default_summary = config.model_reasoning_summary;

View File

@@ -0,0 +1,364 @@
#![cfg(not(target_os = "windows"))]
// unified exec is not supported on Windows OS
use std::sync::Arc;
use anyhow::Result;
use codex_core::CodexAuth;
use codex_core::CodexConversation;
use codex_core::ConversationManager;
use codex_core::ModelProviderInfo;
use codex_core::built_in_model_providers;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandSource;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ClientVersion;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::openai_models::ReasoningSummaryFormat;
use codex_protocol::openai_models::TruncationPolicyConfig;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;
use wiremock::BodyPrintLimit;
use wiremock::MockServer;
const REMOTE_MODEL_SLUG: &str = "codex-test";
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let remote_model = ModelInfo {
slug: REMOTE_MODEL_SLUG.to_string(),
display_name: "Remote Test".to_string(),
description: Some("A remote model that requires the test shell".to_string()),
default_reasoning_level: ReasoningEffort::Medium,
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::UnifiedExec,
visibility: ModelVisibility::List,
minimal_client_version: ClientVersion(0, 1, 0),
supported_in_api: true,
priority: 1,
upgrade: None,
base_instructions: None,
supports_reasoning_summaries: false,
support_verbosity: false,
default_verbosity: None,
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
context_window: None,
reasoning_summary_format: ReasoningSummaryFormat::None,
experimental_supported_tools: Vec::new(),
};
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
etag: String::new(),
},
)
.await;
let harness = build_remote_models_harness(&server, |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some("gpt-5.1".to_string());
})
.await?;
let RemoteModelsHarness {
codex,
cwd,
config,
conversation_manager,
..
} = harness;
let models_manager = conversation_manager.get_models_manager();
let available_model =
wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG, &config).await;
assert_eq!(available_model.model, REMOTE_MODEL_SLUG);
let requests = models_mock.requests();
assert_eq!(
requests.len(),
1,
"expected a single /models refresh request for the remote models feature"
);
assert_eq!(requests[0].url.path(), "/v1/models");
let family = models_manager
.construct_model_family(REMOTE_MODEL_SLUG, &config)
.await;
assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec);
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: None,
sandbox_policy: None,
model: Some(REMOTE_MODEL_SLUG.to_string()),
effort: None,
summary: None,
})
.await?;
let call_id = "call";
let args = json!({
"cmd": "/bin/echo call",
"yield_time_ms": 250,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run call".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: REMOTE_MODEL_SLUG.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_apply_remote_base_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let model = "test-gpt-5-remote";
let remote_base = "Use the remote base instructions only.";
let remote_model = ModelInfo {
slug: model.to_string(),
display_name: "Parallel Remote".to_string(),
description: Some("A remote model with custom instructions".to_string()),
default_reasoning_level: ReasoningEffort::Medium,
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::ShellCommand,
visibility: ModelVisibility::List,
minimal_client_version: ClientVersion(0, 1, 0),
supported_in_api: true,
priority: 1,
upgrade: None,
base_instructions: Some(remote_base.to_string()),
supports_reasoning_summaries: false,
support_verbosity: false,
default_verbosity: None,
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
supports_parallel_tool_calls: false,
context_window: None,
reasoning_summary_format: ReasoningSummaryFormat::None,
experimental_supported_tools: Vec::new(),
};
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
etag: String::new(),
},
)
.await;
let response_mock = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let harness = build_remote_models_harness(&server, |config| {
config.features.enable(Feature::RemoteModels);
config.model = Some("gpt-5.1".to_string());
})
.await?;
let RemoteModelsHarness {
codex,
cwd,
config,
conversation_manager,
..
} = harness;
let models_manager = conversation_manager.get_models_manager();
wait_for_model_available(&models_manager, model, &config).await;
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: None,
sandbox_policy: None,
model: Some(model.to_string()),
effort: None,
summary: None,
})
.await?;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "hello remote".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: model.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let body = response_mock.single_request().body_json();
let instructions = body["instructions"].as_str().unwrap();
assert_eq!(instructions, remote_base);
Ok(())
}
async fn wait_for_model_available(
manager: &Arc<ModelsManager>,
slug: &str,
config: &Config,
) -> ModelPreset {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if let Some(model) = {
let guard = manager.list_models(config).await;
guard.iter().find(|model| model.model == slug).cloned()
} {
return model;
}
if Instant::now() >= deadline {
panic!("timed out waiting for the remote model {slug} to appear");
}
sleep(Duration::from_millis(25)).await;
}
}
struct RemoteModelsHarness {
codex: Arc<CodexConversation>,
cwd: Arc<TempDir>,
config: Config,
conversation_manager: Arc<ConversationManager>,
}
// todo(aibrahim): move this to with_model_provier in test_codex
async fn build_remote_models_harness<F>(
server: &MockServer,
mutate_config: F,
) -> Result<RemoteModelsHarness>
where
F: FnOnce(&mut Config),
{
let auth = CodexAuth::from_api_key("dummy");
let home = Arc::new(TempDir::new()?);
let cwd = Arc::new(TempDir::new()?);
let mut config = load_default_config_for_test(&home);
config.cwd = cwd.path().to_path_buf();
config.features.enable(Feature::RemoteModels);
let provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
config.model_provider = provider.clone();
mutate_config(&mut config);
let conversation_manager = Arc::new(ConversationManager::with_models_provider(auth, provider));
let new_conversation = conversation_manager
.new_conversation(config.clone())
.await?;
Ok(RemoteModelsHarness {
codex: new_conversation.conversation,
cwd,
config,
conversation_manager,
})
}

View File

@@ -4,6 +4,7 @@ use codex_core::AuthManager;
use codex_core::CodexAuth;
use codex_core::ConversationManager;
use codex_core::NewConversation;
use codex_core::built_in_model_providers;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InitialHistory;
use codex_core::protocol::ResumedHistory;
@@ -16,7 +17,11 @@ use core_test_support::load_default_config_for_test;
use core_test_support::wait_for_event;
use tempfile::TempDir;
fn resume_history(config: &codex_core::config::Config, previous_model: &str, rollout_path: &std::path::Path) -> InitialHistory {
fn resume_history(
config: &codex_core::config::Config,
previous_model: &str,
rollout_path: &std::path::Path,
) -> InitialHistory {
let turn_ctx = TurnContextItem {
cwd: config.cwd.clone(),
approval_policy: config.approval_policy,
@@ -38,7 +43,7 @@ async fn emits_warning_when_resumed_model_differs() {
// Arrange a config with a current model and a prior rollout recorded under a different model.
let home = TempDir::new().expect("tempdir");
let mut config = load_default_config_for_test(&home);
config.model = "current-model".to_string();
config.model = Some("current-model".to_string());
// Ensure cwd is absolute (the helper sets it to the temp dir already).
assert!(config.cwd.is_absolute());
@@ -47,7 +52,10 @@ async fn emits_warning_when_resumed_model_differs() {
let initial_history = resume_history(&config, "previous-model", &rollout_path);
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("test"),
config.model_provider.clone(),
);
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
// Act: resume the conversation.

View File

@@ -23,6 +23,7 @@ use codex_core::review_format::render_review_output_text;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id_from_str;
use core_test_support::responses::get_responses_requests;
use core_test_support::skip_if_no_network;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
@@ -394,7 +395,7 @@ async fn review_uses_custom_review_model_from_config() {
let codex_home = TempDir::new().unwrap();
// Choose a review model different from the main model; ensure it is used.
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
cfg.model = "gpt-4.1".to_string();
cfg.model = Some("gpt-4.1".to_string());
cfg.review_model = "gpt-5.1".to_string();
})
.await;
@@ -425,7 +426,10 @@ async fn review_uses_custom_review_model_from_config() {
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Assert the request body model equals the configured review model
let request = &server.received_requests().await.unwrap()[0];
let requests = get_responses_requests(&server).await;
let request = requests
.first()
.expect("expected POST request to /responses");
let body = request.body_json::<serde_json::Value>().unwrap();
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
@@ -543,7 +547,10 @@ async fn review_input_isolated_from_parent_history() {
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Assert the request `input` contains the environment context followed by the user review prompt.
let request = &server.received_requests().await.unwrap()[0];
let requests = get_responses_requests(&server).await;
let request = requests
.first()
.expect("expected POST request to /responses");
let body = request.body_json::<serde_json::Value>().unwrap();
let input = body["input"].as_array().expect("input array");
assert_eq!(
@@ -573,6 +580,10 @@ async fn review_input_isolated_from_parent_history() {
review_prompt,
"user message should only contain the raw review prompt"
);
assert!(
env_text.contains("<sandbox_mode>read-only</sandbox_mode>"),
"review environment context must run with read-only sandbox"
);
// Ensure the REVIEW_PROMPT rubric is sent via instructions.
let instructions = body["instructions"].as_str().expect("instructions string");
@@ -669,7 +680,7 @@ async fn review_history_surfaces_in_parent_session() {
// Inspect the second request (parent turn) input contents.
// Parent turns include session initial messages (user_instructions, environment_context).
// Critically, no messages from the review thread should appear.
let requests = server.received_requests().await.unwrap();
let requests = get_responses_requests(&server).await;
assert_eq!(requests.len(), 2);
let body = requests[1].body_json::<serde_json::Value>().unwrap();
let input = body["input"].as_array().expect("input array");
@@ -739,8 +750,10 @@ where
let mut config = load_default_config_for_test(codex_home);
config.model_provider = model_provider;
mutator(&mut config);
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
conversation_manager
.new_conversation(config)
.await
@@ -766,8 +779,10 @@ where
let mut config = load_default_config_for_test(codex_home);
config.model_provider = model_provider;
mutator(&mut config);
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("Test API Key"),
config.model_provider.clone(),
);
let auth_manager =
codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
conversation_manager

View File

@@ -487,9 +487,13 @@ async fn stdio_image_completions_round_trip() -> anyhow::Result<()> {
// Chat Completions assertion: the second POST should include a tool role message
// with an array `content` containing an item with the expected data URL.
let requests = server.received_requests().await.expect("requests captured");
let all_requests = server.received_requests().await.expect("requests captured");
let requests: Vec<_> = all_requests
.iter()
.filter(|req| req.method == "POST" && req.url.path().ends_with("/chat/completions"))
.collect();
assert!(requests.len() >= 2, "expected two chat completion calls");
let second = &requests[1];
let second = requests[1];
let body: Value = serde_json::from_slice(&second.body)?;
let messages = body
.get("messages")

View File

@@ -0,0 +1,174 @@
use anyhow::Result;
use core_test_support::assert_regex_match;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_windows;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use serde_json::json;
fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
let args = json!({
"command": command,
"timeout_ms": 2_000,
"login": login,
});
#[allow(clippy::expect_used)]
let arguments = serde_json::to_string(&args).expect("serialize shell command arguments");
vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &arguments),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
]
}
async fn shell_command_harness_with(
configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
) -> Result<TestCodexHarness> {
let builder = configure(test_codex()).with_config(|config| {
config.include_apply_patch_tool = true;
});
TestCodexHarness::with_builder(builder).await
}
async fn mount_shell_responses(
harness: &TestCodexHarness,
call_id: &str,
command: &str,
login: Option<bool>,
) {
mount_sse_sequence(harness.server(), shell_responses(call_id, command, login)).await;
}
fn assert_shell_command_output(output: &str, expected: &str) -> Result<()> {
let normalized_output = output
.replace("\r\n", "\n")
.replace('\r', "\n")
.trim_end_matches('\n')
.to_string();
let expected_pattern = format!(
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{expected}\n?$"
);
assert_regex_match(&expected_pattern, &normalized_output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_works() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", None).await;
harness.submit("run the echo command").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-login-true";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(true)).await;
harness.submit("run the echo command with login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn output_without_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-login-false";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(false)).await;
harness.submit("run the echo command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn multi_line_output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-first-extra-login";
mount_shell_responses(
&harness,
call_id,
"echo 'first line\nsecond line'",
Some(true),
)
.await;
harness.submit("run the command with login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "first line\nsecond line")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn pipe_output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
skip_if_windows!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-second-extra-no-login";
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", None).await;
harness.submit("run the command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn pipe_output_without_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
skip_if_windows!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-third-extra-login-false";
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", Some(false)).await;
harness.submit("run the command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}

View File

@@ -2,9 +2,6 @@
#![allow(clippy::expect_used)]
use anyhow::Result;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::SandboxPolicy;
use core_test_support::assert_regex_match;
use core_test_support::responses::ev_assistant_message;
@@ -18,6 +15,7 @@ use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::ApplyPatchModelOutput;
use core_test_support::test_codex::ShellModelOutput;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
@@ -41,20 +39,6 @@ const FIXTURE_JSON: &str = r#"{
}
"#;
fn configure_shell_command_model(output_type: ShellModelOutput, config: &mut Config) {
if !matches!(output_type, ShellModelOutput::ShellCommand) {
return;
}
if let Some(shell_command_family) = find_family_for_model("test-gpt-5-codex") {
if config.model_family.shell_type == shell_command_family.shell_type {
return;
}
config.model = shell_command_family.slug.clone();
config.model_family = shell_command_family;
}
}
fn shell_responses(
call_id: &str,
command: Vec<&str>,
@@ -114,6 +98,24 @@ fn shell_responses(
}
}
fn configure_shell_model(
builder: TestCodexBuilder,
output_type: ShellModelOutput,
include_apply_patch_tool: bool,
) -> TestCodexBuilder {
let builder = match (output_type, include_apply_patch_tool) {
(ShellModelOutput::ShellCommand, _) => builder.with_model("test-gpt-5-codex"),
(ShellModelOutput::LocalShell, true) => builder.with_model("gpt-5.1-codex"),
(ShellModelOutput::Shell, true) => builder.with_model("gpt-5.1-codex"),
(ShellModelOutput::LocalShell, false) => builder.with_model("codex-mini-latest"),
(ShellModelOutput::Shell, false) => builder.with_model("gpt-5"),
};
builder.with_config(move |config| {
config.include_apply_patch_tool = include_apply_patch_tool;
})
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::LocalShell)]
@@ -123,10 +125,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
configure_shell_command_model(output_type, config);
});
let mut builder = configure_shell_model(test_codex(), output_type, false);
let test = builder.build(&server).await?;
let call_id = "shell-json";
@@ -178,10 +177,7 @@ async fn shell_output_is_structured_with_freeform_apply_patch(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::ApplyPatchFreeform);
configure_shell_command_model(output_type, config);
});
let mut builder = configure_shell_model(test_codex(), output_type, true);
let test = builder.build(&server).await?;
let call_id = "shell-structured";
@@ -226,10 +222,7 @@ async fn shell_output_preserves_fixture_json_without_serialization(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
configure_shell_command_model(output_type, config);
});
let mut builder = configure_shell_model(test_codex(), output_type, false);
let test = builder.build(&server).await?;
let fixture_path = test.cwd.path().join("fixture.json");
@@ -293,10 +286,7 @@ async fn shell_output_structures_fixture_with_serialization(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::ApplyPatchFreeform);
configure_shell_command_model(output_type, config);
});
let mut builder = configure_shell_model(test_codex(), output_type, true);
let test = builder.build(&server).await?;
let fixture_path = test.cwd.path().join("fixture.json");
@@ -355,10 +345,7 @@ async fn shell_output_for_freeform_tool_records_duration(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
config.include_apply_patch_tool = true;
configure_shell_command_model(output_type, config);
});
let mut builder = configure_shell_model(test_codex(), output_type, true);
let test = builder.build(&server).await?;
let call_id = "shell-structured";
@@ -408,11 +395,9 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(move |config| {
let mut builder =
configure_shell_model(test_codex(), output_type, true).with_config(move |config| {
config.tool_output_token_limit = Some(200);
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
@@ -713,7 +698,6 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp
.with_model("gpt-5.1-codex")
.with_config(move |config| {
config.include_apply_patch_tool = true;
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
@@ -749,7 +733,7 @@ async fn shell_command_output_is_freeform() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
configure_shell_command_model(ShellModelOutput::ShellCommand, config);
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;

View File

@@ -0,0 +1,379 @@
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandBeginEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf;
use tokio::fs;
#[derive(Debug)]
struct SnapshotRun {
begin: ExecCommandBeginEvent,
end: ExecCommandEndEvent,
snapshot_path: PathBuf,
snapshot_content: String,
codex_home: PathBuf,
}
#[allow(clippy::expect_used)]
async fn run_snapshot_command(command: &str) -> Result<SnapshotRun> {
let builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
config.features.enable(Feature::ShellSnapshot);
});
let harness = TestCodexHarness::with_builder(builder).await?;
let args = json!({
"cmd": command,
"yield_time_ms": 1000,
});
let call_id = "shell-snapshot-exec";
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let test = harness.test();
let codex = test.codex.clone();
let codex_home = test.home.path().to_path_buf();
let session_model = test.session_configured.model.clone();
let cwd = test.cwd_path().to_path_buf();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run unified exec with shell snapshot".into(),
}],
final_output_json_schema: None,
cwd,
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
let end = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
Ok(SnapshotRun {
begin,
end,
snapshot_path,
snapshot_content,
codex_home,
})
}
#[allow(clippy::expect_used)]
async fn run_shell_command_snapshot(command: &str) -> Result<SnapshotRun> {
let builder = test_codex().with_config(|config| {
config.features.enable(Feature::ShellSnapshot);
});
let harness = TestCodexHarness::with_builder(builder).await?;
let args = json!({
"command": command,
"timeout_ms": 1000,
});
let call_id = "shell-snapshot-command";
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let test = harness.test();
let codex = test.codex.clone();
let codex_home = test.home.path().to_path_buf();
let session_model = test.session_configured.model.clone();
let cwd = test.cwd_path().to_path_buf();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell_command with shell snapshot".into(),
}],
final_output_json_schema: None,
cwd,
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
let end = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
Ok(SnapshotRun {
begin,
end,
snapshot_path,
snapshot_content,
codex_home,
})
}
fn normalize_newlines(text: &str) -> String {
text.replace("\r\n", "\n")
}
fn assert_posix_snapshot_sections(snapshot: &str) {
assert!(snapshot.contains("# Snapshot file"));
assert!(snapshot.contains("aliases "));
assert!(snapshot.contains("exports "));
assert!(snapshot.contains("setopts "));
assert!(
snapshot.contains("PATH"),
"snapshot should include PATH exports; snapshot={snapshot:?}"
);
}
#[cfg_attr(not(target_os = "linux"), ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn linux_unified_exec_uses_shell_snapshot() -> Result<()> {
let command = "echo snapshot-linux";
let run = run_snapshot_command(command).await?;
let stdout = normalize_newlines(&run.end.stdout);
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
assert_eq!(run.begin.command.len(), 3);
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert_posix_snapshot_sections(&run.snapshot_content);
assert_eq!(run.end.exit_code, 0);
assert!(
stdout.contains("snapshot-linux"),
"stdout should contain snapshot marker; stdout={stdout:?}"
);
Ok(())
}
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn linux_shell_command_uses_shell_snapshot() -> Result<()> {
let command = "echo shell-command-snapshot-linux";
let run = run_shell_command_snapshot(command).await?;
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
assert_eq!(run.begin.command.len(), 3);
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert_posix_snapshot_sections(&run.snapshot_content);
assert_eq!(
normalize_newlines(&run.end.stdout).trim(),
"shell-command-snapshot-linux"
);
assert_eq!(run.end.exit_code, 0);
Ok(())
}
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
let builder = test_codex().with_config(|config| {
config.features.enable(Feature::ShellSnapshot);
config.include_apply_patch_tool = true;
});
let harness = TestCodexHarness::with_builder(builder).await?;
let test = harness.test();
let codex = test.codex.clone();
let cwd = test.cwd_path().to_path_buf();
let codex_home = test.home.path().to_path_buf();
let target = cwd.join("snapshot-apply.txt");
let script = "apply_patch <<'EOF'\n*** Begin Patch\n*** Add File: snapshot-apply.txt\n+hello from snapshot\n*** End Patch\nEOF\n";
let args = json!({
"command": script,
"timeout_ms": 1_000,
});
let call_id = "shell-snapshot-apply-patch";
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let model = test.session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "apply patch via shell_command with snapshot".into(),
}],
final_output_json_schema: None,
cwd: cwd.clone(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
.next_entry()
.await?
.map(|entry| entry.path())
.expect("shell snapshot created");
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
assert_posix_snapshot_sections(&snapshot_content);
Ok(())
}
#[cfg_attr(not(target_os = "macos"), ignore)]
#[cfg_attr(
target_os = "macos",
ignore = "requires unrestricted networking on macOS"
)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn macos_unified_exec_uses_shell_snapshot() -> Result<()> {
let command = "echo snapshot-macos";
let run = run_snapshot_command(command).await?;
let shell_path = run
.begin
.command
.first()
.expect("shell path recorded")
.clone();
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-c"));
assert_eq!(
run.begin.command.get(2).map(String::as_str),
Some(". \"$0\" && exec \"$@\"")
);
assert_eq!(run.begin.command.get(4), Some(&shell_path));
assert_eq!(run.begin.command.get(5).map(String::as_str), Some("-c"));
assert_eq!(run.begin.command.last(), Some(&command.to_string()));
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert_posix_snapshot_sections(&run.snapshot_content);
assert_eq!(normalize_newlines(&run.end.stdout).trim(), "snapshot-macos");
assert_eq!(run.end.exit_code, 0);
Ok(())
}
// #[cfg_attr(not(target_os = "windows"), ignore)]
#[ignore]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn windows_unified_exec_uses_shell_snapshot() -> Result<()> {
let command = "Write-Output snapshot-windows";
let run = run_snapshot_command(command).await?;
let snapshot_index = run
.begin
.command
.iter()
.position(|arg| arg.contains("shell_snapshots"))
.expect("snapshot argument exists");
assert!(run.begin.command.iter().any(|arg| arg == "-NoProfile"));
assert!(
run.begin
.command
.iter()
.any(|arg| arg == "param($snapshot) . $snapshot; & @args")
);
assert!(snapshot_index > 0);
assert_eq!(run.begin.command.last(), Some(&command.to_string()));
assert!(run.snapshot_path.starts_with(&run.codex_home));
assert!(run.snapshot_content.contains("# Snapshot file"));
assert!(run.snapshot_content.contains("# aliases "));
assert!(run.snapshot_content.contains("# exports "));
assert_eq!(
normalize_newlines(&run.end.stdout).trim(),
"snapshot-windows"
);
assert_eq!(run.end.exit_code, 0);
Ok(())
}

View File

@@ -0,0 +1,136 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_core::protocol::SkillLoadOutcomeInfo;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use std::fs;
use std::path::Path;
fn write_skill(home: &Path, name: &str, description: &str, body: &str) -> std::path::PathBuf {
let skill_dir = home.join("skills").join(name);
fs::create_dir_all(&skill_dir).unwrap();
let contents = format!("---\nname: {name}\ndescription: {description}\n---\n\n{body}\n");
let path = skill_dir.join("SKILL.md");
fs::write(&path, contents).unwrap();
path
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_includes_skill_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let skill_body = "skill body";
let mut builder = test_codex()
.with_config(|cfg| {
cfg.features.enable(Feature::Skills);
})
.with_pre_build_hook(|home| {
write_skill(home, "demo", "demo skill", skill_body);
});
let test = builder.build(&server).await?;
let skill_path = test.codex_home_path().join("skills/demo/SKILL.md");
let skill_path = std::fs::canonicalize(skill_path)?;
let mock = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![
UserInput::Text {
text: "please use $demo".to_string(),
},
UserInput::Skill {
name: "demo".to_string(),
path: skill_path.clone(),
},
],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: codex_protocol::config_types::ReasoningSummary::Auto,
})
.await?;
core_test_support::wait_for_event(test.codex.as_ref(), |event| {
matches!(event, codex_core::protocol::EventMsg::TaskComplete(_))
})
.await;
let request = mock.single_request();
let user_texts = request.message_input_texts("user");
let skill_path_str = skill_path.to_string_lossy();
assert!(
user_texts.iter().any(|text| {
text.contains("<skill>\n<name>demo</name>")
&& text.contains("<path>")
&& text.contains(skill_body)
&& text.contains(skill_path_str.as_ref())
}),
"expected skill instructions in user input, got {user_texts:?}"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_config(|cfg| {
cfg.features.enable(Feature::Skills);
})
.with_pre_build_hook(|home| {
let skill_dir = home.join("skills").join("broken");
fs::create_dir_all(&skill_dir).unwrap();
fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
});
let test = builder.build(&server).await?;
let SkillLoadOutcomeInfo { skills, errors } = test
.session_configured
.skill_load_outcome
.as_ref()
.expect("skill outcome present");
assert!(
skills.is_empty(),
"expected no skills loaded, got {skills:?}"
);
assert_eq!(errors.len(), 1, "expected one load error");
let error_path = errors[0].path.to_string_lossy();
assert!(
error_path.ends_with("skills/broken/SKILL.md"),
"unexpected error path: {error_path}"
);
Ok(())
}

View File

@@ -10,6 +10,7 @@ use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::SandboxPolicy;
use codex_core::sandboxing::SandboxPermissions;
use core_test_support::assert_regex_match;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
@@ -105,7 +106,7 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
let first_args = json!({
"command": command,
"timeout_ms": 1_000,
"with_escalated_permissions": true,
"sandbox_permissions": SandboxPermissions::RequireEscalated,
});
let second_args = json!({
"command": command,

View File

@@ -1,4 +1,3 @@
#![cfg(not(target_os = "windows"))]
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
@@ -19,11 +18,13 @@ use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::get_responses_request_bodies;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::skip_if_windows;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
@@ -155,6 +156,7 @@ fn collect_tool_outputs(bodies: &[Value]) -> Result<HashMap<String, ParsedUnifie
async fn unified_exec_intercepts_apply_patch_exec_command() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
@@ -227,6 +229,7 @@ async fn unified_exec_intercepts_apply_patch_exec_command() -> Result<()> {
false
}
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => {
println!("Saw it");
saw_exec_begin = true;
false
}
@@ -279,6 +282,7 @@ async fn unified_exec_intercepts_apply_patch_exec_command() -> Result<()> {
async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -350,6 +354,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
async fn unified_exec_resolves_relative_workdir() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -427,6 +432,7 @@ async fn unified_exec_resolves_relative_workdir() -> Result<()> {
async fn unified_exec_respects_workdir_override() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -505,6 +511,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -591,6 +598,7 @@ async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -642,16 +650,16 @@ async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
})
.await?;
let delta = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == call_id => Some(ev.clone()),
let event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
let text = String::from_utf8_lossy(&delta.chunk).to_string();
let text = event.stdout;
assert!(
text.contains("HELLO-UEXEC"),
"delta chunk missing expected text: {text:?}"
"delta chunk missing expected text: {text:?}",
);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
@@ -659,9 +667,119 @@ async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_emits_output_delta_for_write_stdin() -> Result<()> {
async fn unified_exec_full_lifecycle_with_background_end_event() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "uexec-full-lifecycle";
let args = json!({
"cmd": "printf 'HELLO-FULL-LIFECYCLE'",
"yield_time_ms": 250,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "finished"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "exercise full unified exec lifecycle".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let mut begin_event = None;
let mut end_event = None;
let mut saw_delta_with_marker = 0;
loop {
let msg = wait_for_event(&codex, |_| true).await;
match msg {
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => begin_event = Some(ev),
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == call_id => {
let text = String::from_utf8_lossy(&ev.chunk);
if text.contains("HELLO-FULL-LIFECYCLE") {
saw_delta_with_marker += 1;
}
}
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => {
assert!(
end_event.is_none(),
"expected a single ExecCommandEnd event for this call id"
);
end_event = Some(ev);
}
EventMsg::TaskComplete(_) => break,
_ => {}
}
}
let begin_event = begin_event.expect("expected ExecCommandBegin event");
assert_eq!(begin_event.call_id, call_id);
assert!(
begin_event.process_id.is_some(),
"begin event should include a process_id for a long-lived session"
);
assert_eq!(
saw_delta_with_marker, 0,
"no ExecCommandOutputDelta should be sent for early exit commands"
);
let end_event = end_event.expect("expected ExecCommandEnd event");
assert_eq!(end_event.call_id, call_id);
assert_eq!(end_event.exit_code, 0);
assert!(
end_event.process_id.is_some(),
"end event should include process_id emitted by background watcher"
);
assert!(
end_event.aggregated_output.contains("HELLO-FULL-LIFECYCLE"),
"aggregated_output should contain the full PTY transcript; got {:?}",
end_event.aggregated_output
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_emits_terminal_interaction_for_write_stdin() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -733,34 +851,34 @@ async fn unified_exec_emits_output_delta_for_write_stdin() -> Result<()> {
})
.await?;
// Expect a delta event corresponding to the write_stdin call.
let delta = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == open_call_id => {
let text = String::from_utf8_lossy(&ev.chunk);
if text.contains("WSTDIN-MARK") {
Some(ev.clone())
} else {
None
let mut terminal_interaction = None;
loop {
let msg = wait_for_event(&codex, |_| true).await;
match msg {
EventMsg::TerminalInteraction(ev) if ev.call_id == open_call_id => {
terminal_interaction = Some(ev);
}
EventMsg::TaskComplete(_) => break,
_ => {}
}
_ => None,
})
.await;
}
let text = String::from_utf8_lossy(&delta.chunk).to_string();
assert!(
text.contains("WSTDIN-MARK"),
"stdin delta chunk missing expected text: {text:?}"
);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let delta = terminal_interaction.expect("expected TerminalInteraction event");
assert_eq!(delta.process_id, "1000");
let expected_stdin = stdin_args
.get("chars")
.and_then(Value::as_str)
.expect("stdin chars");
assert_eq!(delta.stdin, expected_stdin);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_emits_begin_for_write_stdin() -> Result<()> {
async fn unified_exec_terminal_interaction_captures_delayed_output() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -775,18 +893,33 @@ async fn unified_exec_emits_begin_for_write_stdin() -> Result<()> {
..
} = builder.build(&server).await?;
let open_call_id = "uexec-open-for-begin";
let open_call_id = "uexec-delayed-open";
let open_args = json!({
"shell": "bash".to_string(),
"cmd": "bash -i".to_string(),
"yield_time_ms": 200,
"cmd": "sleep 3 && echo MARKER1 && sleep 3 && echo MARKER2",
"yield_time_ms": 10,
});
let stdin_call_id = "uexec-stdin-begin";
let stdin_args = json!({
"chars": "echo hello",
// Poll stdin three times: first for no output, second after the first marker,
// and a final long poll to capture the second marker.
let first_poll_call_id = "uexec-delayed-poll-1";
let first_poll_args = json!({
"chars": "",
"session_id": 1000,
"yield_time_ms": 400,
"yield_time_ms": 10,
});
let second_poll_call_id = "uexec-delayed-poll-2";
let second_poll_args = json!({
"chars": "",
"session_id": 1000,
"yield_time_ms": 4000,
});
let third_poll_call_id = "uexec-delayed-poll-3";
let third_poll_args = json!({
"chars": "",
"session_id": 1000,
"yield_time_ms": 6000,
});
let responses = vec![
@@ -802,17 +935,35 @@ async fn unified_exec_emits_begin_for_write_stdin() -> Result<()> {
sse(vec![
ev_response_created("resp-2"),
ev_function_call(
stdin_call_id,
first_poll_call_id,
"write_stdin",
&serde_json::to_string(&stdin_args)?,
&serde_json::to_string(&first_poll_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-3"),
ev_assistant_message("msg-1", "done"),
ev_function_call(
second_poll_call_id,
"write_stdin",
&serde_json::to_string(&second_poll_args)?,
),
ev_completed("resp-3"),
]),
sse(vec![
ev_response_created("resp-4"),
ev_function_call(
third_poll_call_id,
"write_stdin",
&serde_json::to_string(&third_poll_args)?,
),
ev_completed("resp-4"),
]),
sse(vec![
ev_response_created("resp-5"),
ev_assistant_message("msg-1", "complete"),
ev_completed("resp-5"),
]),
];
mount_sse_sequence(&server, responses).await;
@@ -821,7 +972,7 @@ async fn unified_exec_emits_begin_for_write_stdin() -> Result<()> {
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "begin events for stdin".into(),
text: "delayed terminal interaction output".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
@@ -833,30 +984,91 @@ async fn unified_exec_emits_begin_for_write_stdin() -> Result<()> {
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(ev) if ev.call_id == stdin_call_id => Some(ev.clone()),
_ => None,
})
.await;
let mut begin_event = None;
let mut end_event = None;
let mut task_completed = false;
let mut terminal_events = Vec::new();
let mut delta_text = String::new();
assert_command(&begin_event.command, "-lc", "bash -i");
assert_eq!(
begin_event.interaction_input,
Some("echo hello".to_string())
);
assert_eq!(
begin_event.source,
ExecCommandSource::UnifiedExecInteraction
// Consume all events for this turn so we can assert on each stage.
loop {
let msg = wait_for_event(&codex, |_| true).await;
match msg {
EventMsg::ExecCommandBegin(ev) if ev.call_id == open_call_id => {
begin_event = Some(ev);
}
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == open_call_id => {
delta_text.push_str(&String::from_utf8_lossy(&ev.chunk));
}
EventMsg::TerminalInteraction(ev) if ev.call_id == open_call_id => {
terminal_events.push(ev);
}
EventMsg::ExecCommandEnd(ev) if ev.call_id == open_call_id => {
end_event = Some(ev);
}
EventMsg::TaskComplete(_) => {
task_completed = true;
}
_ => {}
};
if task_completed && end_event.is_some() {
break;
}
}
let begin_event = begin_event.expect("expected ExecCommandBegin event");
assert!(
begin_event.process_id.is_some(),
"begin event should include process_id for a live session"
);
// We expect three terminal interactions matching the three write_stdin calls.
assert_eq!(
terminal_events.len(),
3,
"expected three terminal interactions; got {terminal_events:?}"
);
for event in &terminal_events {
assert_eq!(event.call_id, open_call_id);
assert_eq!(event.process_id, "1000");
}
assert_eq!(
terminal_events
.iter()
.map(|ev| ev.stdin.as_str())
.collect::<Vec<_>>(),
vec!["", "", ""],
"terminal interactions should reflect the three stdin polls"
);
assert!(
delta_text.contains("MARKER1") && delta_text.contains("MARKER2"),
"streamed deltas should contain both markers; got {delta_text:?}"
);
let end_event = end_event.expect("expected ExecCommandEnd event");
assert_eq!(end_event.call_id, open_call_id);
assert_eq!(end_event.exit_code, 0);
assert!(
end_event.process_id.is_some(),
"end event should include the process_id"
);
assert!(
end_event.aggregated_output.contains("MARKER1")
&& end_event.aggregated_output.contains("MARKER2"),
"aggregated output should include both markers in order; got {:?}",
end_event.aggregated_output
);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()> {
async fn unified_exec_emits_one_begin_and_one_end_event() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -874,8 +1086,8 @@ async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()>
let open_call_id = "uexec-open-session";
let open_args = json!({
"shell": "bash".to_string(),
"cmd": "bash -i".to_string(),
"yield_time_ms": 250,
"cmd": "sleep 0.1".to_string(),
"yield_time_ms": 10,
});
let poll_call_id = "uexec-poll-empty";
@@ -930,10 +1142,12 @@ async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()>
.await?;
let mut begin_events = Vec::new();
let mut end_events = Vec::new();
loop {
let event_msg = wait_for_event(&codex, |_| true).await;
match event_msg {
EventMsg::ExecCommandBegin(event) => begin_events.push(event),
EventMsg::ExecCommandEnd(event) => end_events.push(event),
EventMsg::TaskComplete(_) => break,
_ => {}
}
@@ -941,16 +1155,19 @@ async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()>
assert_eq!(
begin_events.len(),
2,
"expected begin events for the startup command and the write_stdin call"
1,
"expected begin events for the startup command"
);
let open_event = begin_events
.iter()
.find(|ev| ev.call_id == open_call_id)
.expect("missing exec_command begin");
assert_eq!(
end_events.len(),
1,
"expected end event for the write_stdin call"
);
assert_command(&open_event.command, "-lc", "bash -i");
let open_event = &begin_events[0];
assert_command(&open_event.command, "-lc", "sleep 0.1");
assert!(
open_event.interaction_input.is_none(),
@@ -958,18 +1175,8 @@ async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()>
);
assert_eq!(open_event.source, ExecCommandSource::UnifiedExecStartup);
let poll_event = begin_events
.iter()
.find(|ev| ev.call_id == poll_call_id)
.expect("missing write_stdin begin");
assert_command(&poll_event.command, "-lc", "bash -i");
assert!(
poll_event.interaction_input.is_none(),
"poll begin events should omit interaction input"
);
assert_eq!(poll_event.source, ExecCommandSource::UnifiedExecInteraction);
let end_event = &end_events[0];
assert_eq!(end_event.call_id, open_call_id);
Ok(())
}
@@ -978,6 +1185,7 @@ async fn unified_exec_emits_begin_event_for_write_stdin_requests() -> Result<()>
async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1033,10 +1241,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let metadata = outputs
@@ -1085,6 +1290,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1139,10 +1345,7 @@ async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs
@@ -1177,6 +1380,7 @@ async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1266,10 +1470,7 @@ async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
@@ -1338,6 +1539,7 @@ async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
async fn unified_exec_emits_end_event_when_session_dies_via_stdin() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1442,6 +1644,7 @@ async fn unified_exec_emits_end_event_when_session_dies_via_stdin() -> Result<()
async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1516,10 +1719,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
@@ -1553,6 +1753,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
async fn unified_exec_streams_after_lagged_output() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1652,10 +1853,7 @@ PY
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
@@ -1684,6 +1882,7 @@ PY
async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1763,10 +1962,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
@@ -1790,6 +1986,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
async fn unified_exec_formats_large_output_summary() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1851,10 +2048,7 @@ PY
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let large_output = outputs.get(call_id).expect("missing large output summary");
@@ -1875,6 +2069,7 @@ PY
async fn unified_exec_runs_under_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
@@ -1930,10 +2125,7 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs.get(call_id).expect("missing output");
@@ -1943,11 +2135,201 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {
Ok(())
}
#[cfg(target_os = "macos")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_python_prompt_under_seatbelt() -> Result<()> {
skip_if_no_network!(Ok(()));
let python = match which::which("python").or_else(|_| which::which("python3")) {
Ok(path) => path,
Err(_) => {
eprintln!("python not found in PATH, skipping test.");
return Ok(());
}
};
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let startup_call_id = "uexec-python-seatbelt";
let startup_args = serde_json::json!({
"cmd": format!("{} -i", python.display()),
"yield_time_ms": 1_500,
});
let exit_call_id = "uexec-python-exit";
let exit_args = serde_json::json!({
"chars": "exit()\n",
"session_id": 1000,
"yield_time_ms": 1_500,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
startup_call_id,
"exec_command",
&serde_json::to_string(&startup_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_function_call(
exit_call_id,
"write_stdin",
&serde_json::to_string(&exit_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-3"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-3"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "start python under seatbelt".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::ReadOnly,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let startup_output = outputs
.get(startup_call_id)
.expect("missing python startup output");
let output_text = startup_output.output.replace("\r\n", "\n");
// This assert that we are in a TTY.
assert!(
output_text.contains(">>>"),
"python prompt missing from seatbelt output: {output_text:?}"
);
assert_eq!(
startup_output.process_id.as_deref(),
Some("1000"),
"python session should stay alive for follow-up input"
);
let exit_output = outputs
.get(exit_call_id)
.expect("missing python exit output");
assert_eq!(
exit_output.exit_code,
Some(0),
"python should exit cleanly after exit()"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_runs_on_all_platforms() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "uexec";
let args = serde_json::json!({
"cmd": "echo 'hello crossplat'",
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "summarize large output".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = get_responses_request_bodies(&server).await;
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs.get(call_id).expect("missing output");
// TODO: Weaker match because windows produces control characters
assert_regex_match(".*hello crossplat.*", &output.output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[ignore]
async fn unified_exec_prunes_exited_sessions_first() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;

View File

@@ -42,8 +42,10 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
let mut config = load_default_config_for_test(&codex_home);
config.cwd = cwd.path().to_path_buf();
let conversation_manager =
ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
codex_core::CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
..
@@ -99,8 +101,10 @@ async fn user_shell_cmd_can_be_interrupted() {
// Set up isolated config and conversation.
let codex_home = TempDir::new().unwrap();
let config = load_default_config_for_test(&codex_home);
let conversation_manager =
ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::with_models_provider(
codex_core::CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let NewConversation {
conversation: codex,
..