Assemble sandbox/approval/network prompts dynamically (#8961)

- Add a single builder for developer permissions messaging that accepts
SandboxPolicy and approval policy. This builder now drives the developer
“permissions” message that’s injected at session start and any time
sandbox/approval settings change.
- Trim EnvironmentContext to only include cwd, writable roots, and
shell; removed sandbox/approval/network duplication and adjusted XML
serialization and tests accordingly.

Follow-up: adding a config value to replace the developer permissions
message for custom sandboxes.
This commit is contained in:
Ahmed Ibrahim
2026-01-12 15:12:59 -08:00
committed by GitHub
parent 3a6a43ff5c
commit 87f7226cca
30 changed files with 1089 additions and 655 deletions

View File

@@ -284,7 +284,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
let expected_initial_json = json!([]);
assert_eq!(initial_json, expected_initial_json);
// 2) Submit new input; the request body must include the prior item followed by the new user input.
// 2) Submit new input; the request body must include the prior items, then initial context, then new user input.
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
@@ -298,24 +298,55 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
let request = resp_mock.single_request();
let request_body = request.body_json();
let expected_input = json!([
{
"type": "message",
"role": "user",
"content": [{ "type": "input_text", "text": "resumed user message" }]
},
{
"type": "message",
"role": "assistant",
"content": [{ "type": "output_text", "text": "resumed assistant message" }]
},
{
"type": "message",
"role": "user",
"content": [{ "type": "input_text", "text": "hello" }]
}
]);
assert_eq!(request_body["input"], expected_input);
let input = request_body["input"].as_array().expect("input array");
let messages: Vec<(String, String)> = input
.iter()
.filter_map(|item| {
let role = item.get("role")?.as_str()?;
let text = item
.get("content")?
.as_array()?
.first()?
.get("text")?
.as_str()?;
Some((role.to_string(), text.to_string()))
})
.collect();
let pos_prior_user = messages
.iter()
.position(|(role, text)| role == "user" && text == "resumed user message")
.expect("prior user message");
let pos_prior_assistant = messages
.iter()
.position(|(role, text)| role == "assistant" && text == "resumed assistant message")
.expect("prior assistant message");
let pos_permissions = messages
.iter()
.position(|(role, text)| role == "developer" && text.contains("`approval_policy`"))
.expect("permissions message");
let pos_user_instructions = messages
.iter()
.position(|(role, text)| {
role == "user"
&& text.contains("be nice")
&& (text.starts_with("# AGENTS.md instructions for ")
|| text.starts_with("<user_instructions>"))
})
.expect("user instructions");
let pos_environment = messages
.iter()
.position(|(role, text)| role == "user" && text.contains("<environment_context>"))
.expect("environment context");
let pos_new_user = messages
.iter()
.position(|(role, text)| role == "user" && text == "hello")
.expect("new user message");
assert!(pos_prior_user < pos_prior_assistant);
assert!(pos_prior_assistant < pos_permissions);
assert!(pos_permissions < pos_user_instructions);
assert!(pos_user_instructions < pos_environment);
assert!(pos_environment < pos_new_user);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -615,17 +646,26 @@ async fn includes_user_instructions_message_in_request() {
.unwrap()
.contains("be nice")
);
assert_message_role(&request_body["input"][0], "user");
assert_message_starts_with(&request_body["input"][0], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][0], "</INSTRUCTIONS>");
let ui_text = request_body["input"][0]["content"][0]["text"]
assert_message_role(&request_body["input"][0], "developer");
let permissions_text = request_body["input"][0]["content"][0]["text"]
.as_str()
.expect("invalid permissions message content");
assert!(
permissions_text.contains("`sandbox_mode`"),
"expected permissions message to mention sandbox_mode, got {permissions_text:?}"
);
assert_message_role(&request_body["input"][1], "user");
assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][1], "</INSTRUCTIONS>");
let ui_text = request_body["input"][1]["content"][0]["text"]
.as_str()
.expect("invalid message content");
assert!(ui_text.contains("<INSTRUCTIONS>"));
assert!(ui_text.contains("be nice"));
assert_message_role(&request_body["input"][1], "user");
assert_message_starts_with(&request_body["input"][1], "<environment_context>");
assert_message_ends_with(&request_body["input"][1], "</environment_context>");
assert_message_role(&request_body["input"][2], "user");
assert_message_starts_with(&request_body["input"][2], "<environment_context>");
assert_message_ends_with(&request_body["input"][2], "</environment_context>");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -679,8 +719,10 @@ async fn skills_append_to_instructions() {
let request = resp_mock.single_request();
let request_body = request.body_json();
assert_message_role(&request_body["input"][0], "user");
let instructions_text = request_body["input"][0]["content"][0]["text"]
assert_message_role(&request_body["input"][0], "developer");
assert_message_role(&request_body["input"][1], "user");
let instructions_text = request_body["input"][1]["content"][0]["text"]
.as_str()
.expect("instructions text");
assert!(
@@ -1046,6 +1088,10 @@ async fn includes_developer_instructions_message_in_request() {
let request = resp_mock.single_request();
let request_body = request.body_json();
let permissions_text = request_body["input"][0]["content"][0]["text"]
.as_str()
.expect("invalid permissions message content");
assert!(
!request_body["instructions"]
.as_str()
@@ -1053,18 +1099,24 @@ async fn includes_developer_instructions_message_in_request() {
.contains("be nice")
);
assert_message_role(&request_body["input"][0], "developer");
assert_message_equals(&request_body["input"][0], "be useful");
assert_message_role(&request_body["input"][1], "user");
assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][1], "</INSTRUCTIONS>");
let ui_text = request_body["input"][1]["content"][0]["text"]
assert!(
permissions_text.contains("`sandbox_mode`"),
"expected permissions message to mention sandbox_mode, got {permissions_text:?}"
);
assert_message_role(&request_body["input"][1], "developer");
assert_message_equals(&request_body["input"][1], "be useful");
assert_message_role(&request_body["input"][2], "user");
assert_message_starts_with(&request_body["input"][2], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][2], "</INSTRUCTIONS>");
let ui_text = request_body["input"][2]["content"][0]["text"]
.as_str()
.expect("invalid message content");
assert!(ui_text.contains("<INSTRUCTIONS>"));
assert!(ui_text.contains("be nice"));
assert_message_role(&request_body["input"][2], "user");
assert_message_starts_with(&request_body["input"][2], "<environment_context>");
assert_message_ends_with(&request_body["input"][2], "</environment_context>");
assert_message_role(&request_body["input"][3], "user");
assert_message_starts_with(&request_body["input"][3], "<environment_context>");
assert_message_ends_with(&request_body["input"][3], "</environment_context>");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]

View File

@@ -604,8 +604,14 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
.and_then(|item| item.get("text"))
.and_then(|text| text.as_str());
// Ignore the cached UI prefix (project docs + skills) since it is not relevant to
// compaction behavior and can change as bundled skills evolve.
// Ignore cached prefix messages (project docs + permissions) since they are not
// relevant to compaction behavior and can change as bundled prompts evolve.
let role = value.get("role").and_then(|role| role.as_str());
if role == Some("developer")
&& text.is_some_and(|text| text.contains("`sandbox_mode`"))
{
return false;
}
!text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
})
.cloned()
@@ -1726,9 +1732,11 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
.into_iter()
.collect::<VecDeque<_>>();
// System prompt
// Permissions developer message
final_output.pop_front();
// Developer instructions
// User instructions (project docs/skills)
final_output.pop_front();
// Environment context
final_output.pop_front();
let _ = final_output

View File

@@ -216,11 +216,12 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
.as_str()
.unwrap_or_default()
.to_string();
let user_instructions = requests[0]["input"][0]["content"][0]["text"]
let permissions_message = requests[0]["input"][0].clone();
let user_instructions = requests[0]["input"][1]["content"][0]["text"]
.as_str()
.unwrap_or_default()
.to_string();
let environment_context = requests[0]["input"][1]["content"][0]["text"]
let environment_context = requests[0]["input"][2]["content"][0]["text"]
.as_str()
.unwrap_or_default()
.to_string();
@@ -241,6 +242,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
"model": expected_model,
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -290,6 +292,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
"model": expected_model,
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -359,6 +362,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
"model": expected_model,
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -419,6 +423,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
"model": expected_model,
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -470,6 +475,27 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
}
]
},
permissions_message,
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": user_instructions
}
]
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": environment_context
}
]
},
{
"type": "message",
"role": "user",
@@ -499,6 +525,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
"model": expected_model,
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -550,6 +577,48 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
}
]
},
permissions_message,
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": user_instructions
}
]
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": environment_context
}
]
},
permissions_message,
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": user_instructions
}
]
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": environment_context
}
]
},
{
"type": "message",
"role": "user",
@@ -664,11 +733,12 @@ async fn compact_resume_after_second_compaction_preserves_history() {
.as_str()
.unwrap_or_default()
.to_string();
let user_instructions = requests[0]["input"][0]["content"][0]["text"]
let permissions_message = requests[0]["input"][0].clone();
let user_instructions = requests[0]["input"][1]["content"][0]["text"]
.as_str()
.unwrap_or_default()
.to_string();
let environment_instructions = requests[0]["input"][1]["content"][0]["text"]
let environment_instructions = requests[0]["input"][2]["content"][0]["text"]
.as_str()
.unwrap_or_default()
.to_string();
@@ -682,6 +752,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
{
"instructions": prompt,
"input": [
permissions_message,
{
"type": "message",
"role": "user",
@@ -723,6 +794,27 @@ async fn compact_resume_after_second_compaction_preserves_history() {
}
]
},
permissions_message,
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": user_instructions
}
]
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": environment_instructions
}
]
},
{
"type": "message",
"role": "user",

View File

@@ -138,8 +138,9 @@ async fn fork_thread_twice_drops_to_first_message() {
// GetHistory on fork1 flushed; the file is ready.
let fork1_items = read_items(&fork1_path);
assert!(fork1_items.len() > expected_after_first.len());
pretty_assertions::assert_eq!(
serde_json::to_value(&fork1_items).unwrap(),
serde_json::to_value(&fork1_items[..expected_after_first.len()]).unwrap(),
serde_json::to_value(&expected_after_first).unwrap()
);
@@ -162,8 +163,9 @@ async fn fork_thread_twice_drops_to_first_message() {
.unwrap_or(0);
let expected_after_second: Vec<RolloutItem> = fork1_items[..cut_last_on_fork1].to_vec();
let fork2_items = read_items(&fork2_path);
assert!(fork2_items.len() > expected_after_second.len());
pretty_assertions::assert_eq!(
serde_json::to_value(&fork2_items).unwrap(),
serde_json::to_value(&fork2_items[..expected_after_second.len()]).unwrap(),
serde_json::to_value(&expected_after_second).unwrap()
);
}

View File

@@ -42,6 +42,7 @@ mod model_overrides;
mod model_tools;
mod models_etag_responses;
mod otel;
mod permissions_messages;
mod prompt_caching;
mod quota_exceeded;
mod read_file;

View File

@@ -0,0 +1,448 @@
use anyhow::Result;
use codex_core::config::Constrained;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use std::collections::HashSet;
use tempfile::TempDir;
fn permissions_texts(input: &[serde_json::Value]) -> Vec<String> {
input
.iter()
.filter_map(|item| {
let role = item.get("role")?.as_str()?;
if role != "developer" {
return None;
}
let text = item
.get("content")?
.as_array()?
.first()?
.get("text")?
.as_str()?;
if text.contains("`approval_policy`") {
Some(text.to_string())
} else {
None
}
})
.collect()
}
fn sse_completed(id: &str) -> String {
sse(vec![ev_response_created(id), ev_completed(id)])
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permissions_message_sent_once_on_start() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let req = mount_sse_once(&server, sse_completed("resp-1")).await;
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
});
let test = builder.build(&server).await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = req.single_request();
let body = request.body_json();
let input = body["input"].as_array().expect("input array");
let permissions = permissions_texts(input);
assert_eq!(permissions.len(), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permissions_message_added_on_override_change() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
});
let test = builder.build(&server).await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 1".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
test.codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 2".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let input1 = body1["input"].as_array().expect("input array");
let input2 = body2["input"].as_array().expect("input array");
let permissions_1 = permissions_texts(input1);
let permissions_2 = permissions_texts(input2);
assert_eq!(permissions_1.len(), 1);
assert_eq!(permissions_2.len(), 2);
let unique = permissions_2.into_iter().collect::<HashSet<String>>();
assert_eq!(unique.len(), 2);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permissions_message_not_added_when_no_change() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
});
let test = builder.build(&server).await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 1".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 2".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let input1 = body1["input"].as_array().expect("input array");
let input2 = body2["input"].as_array().expect("input array");
let permissions_1 = permissions_texts(input1);
let permissions_2 = permissions_texts(input2);
assert_eq!(permissions_1.len(), 1);
assert_eq!(permissions_2.len(), 1);
assert_eq!(permissions_1, permissions_2);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resume_replays_permissions_messages() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let _req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let _req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let req3 = mount_sse_once(&server, sse_completed("resp-3")).await;
let mut builder = test_codex().with_config(|config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
});
let initial = builder.build(&server).await?;
let rollout_path = initial.session_configured.rollout_path.clone();
let home = initial.home.clone();
initial
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 1".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
initial
.codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await?;
initial
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 2".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let resumed = builder.resume(&server, home, rollout_path).await?;
resumed
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "after resume".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&resumed.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body3 = req3.single_request().body_json();
let input = body3["input"].as_array().expect("input array");
let permissions = permissions_texts(input);
assert_eq!(permissions.len(), 3);
let unique = permissions.into_iter().collect::<HashSet<String>>();
assert_eq!(unique.len(), 2);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resume_and_fork_append_permissions_messages() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let _req1 = mount_sse_once(&server, sse_completed("resp-1")).await;
let req2 = mount_sse_once(&server, sse_completed("resp-2")).await;
let req3 = mount_sse_once(&server, sse_completed("resp-3")).await;
let req4 = mount_sse_once(&server, sse_completed("resp-4")).await;
let mut builder = test_codex().with_config(|config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
});
let initial = builder.build(&server).await?;
let rollout_path = initial.session_configured.rollout_path.clone();
let home = initial.home.clone();
initial
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 1".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
initial
.codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await?;
initial
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello 2".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body2 = req2.single_request().body_json();
let input2 = body2["input"].as_array().expect("input array");
let permissions_base = permissions_texts(input2);
assert_eq!(permissions_base.len(), 2);
builder = builder.with_config(|config| {
config.approval_policy = Constrained::allow_any(AskForApproval::UnlessTrusted);
});
let resumed = builder.resume(&server, home, rollout_path.clone()).await?;
resumed
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "after resume".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&resumed.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body3 = req3.single_request().body_json();
let input3 = body3["input"].as_array().expect("input array");
let permissions_resume = permissions_texts(input3);
assert_eq!(permissions_resume.len(), permissions_base.len() + 1);
assert_eq!(
&permissions_resume[..permissions_base.len()],
permissions_base.as_slice()
);
assert!(!permissions_base.contains(permissions_resume.last().expect("new permissions")));
let mut fork_config = initial.config.clone();
fork_config.approval_policy = Constrained::allow_any(AskForApproval::UnlessTrusted);
let forked = initial
.thread_manager
.fork_thread(usize::MAX, fork_config, rollout_path)
.await?;
forked
.thread
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "after fork".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&forked.thread, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body4 = req4.single_request().body_json();
let input4 = body4["input"].as_array().expect("input array");
let permissions_fork = permissions_texts(input4);
assert_eq!(permissions_fork.len(), permissions_base.len() + 2);
assert_eq!(
&permissions_fork[..permissions_base.len()],
permissions_base.as_slice()
);
let new_permissions = &permissions_fork[permissions_base.len()..];
assert_eq!(new_permissions.len(), 2);
assert_eq!(new_permissions[0], new_permissions[1]);
assert!(!permissions_base.contains(&new_permissions[0]));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permissions_message_includes_writable_roots() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let req = mount_sse_once(&server, sse_completed("resp-1")).await;
let writable = TempDir::new()?;
let writable_root = AbsolutePathBuf::try_from(writable.path())?;
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![writable_root],
network_access: false,
exclude_tmpdir_env_var: false,
exclude_slash_tmp: false,
};
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
config.sandbox_policy = Constrained::allow_any(sandbox_policy);
});
let test = builder.build(&server).await?;
test.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let body = req.single_request().body_json();
let input = body["input"].as_array().expect("input array");
let permissions = permissions_texts(input);
let sandbox_text = "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted.";
let approval_text = " Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task.\n\nHere are scenarios where you'll need to request approval:\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n\nWhen requesting approval to execute a command that will require escalated privileges:\n - Provide the `sandbox_permissions` parameter with the value `\"require_escalated\"`\n - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter";
// Normalize paths by removing trailing slashes to match AbsolutePathBuf behavior
let normalize_path =
|p: &std::path::Path| -> String { p.to_string_lossy().trim_end_matches('/').to_string() };
let mut roots = vec![
normalize_path(writable.path()),
normalize_path(test.config.cwd.as_path()),
];
if cfg!(unix) && std::path::Path::new("/tmp").is_dir() {
roots.push("/tmp".to_string());
}
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
let tmpdir_path = std::path::PathBuf::from(&tmpdir);
if tmpdir_path.is_absolute() && !tmpdir.is_empty() {
roots.push(normalize_path(&tmpdir_path));
}
}
let roots_text = if roots.len() == 1 {
format!(" The writable root is `{}`.", roots[0])
} else {
format!(
" The writable roots are {}.",
roots
.iter()
.map(|root| format!("`{root}`"))
.collect::<Vec<_>>()
.join(", ")
)
};
let expected = format!(
"<permissions instructions>{sandbox_text}{approval_text}{roots_text}</permissions instructions>"
);
// Normalize line endings to handle Windows vs Unix differences
let normalize_line_endings = |s: &str| s.replace("\r\n", "\n");
let expected_normalized = normalize_line_endings(&expected);
let actual_normalized: Vec<String> = permissions
.iter()
.map(|s| normalize_line_endings(s))
.collect();
assert_eq!(actual_normalized, vec![expected_normalized]);
Ok(())
}

View File

@@ -36,9 +36,6 @@ fn default_env_context_str(cwd: &str, shell: &Shell) -> String {
format!(
r#"<environment_context>
<cwd>{cwd}</cwd>
<approval_policy>on-request</approval_policy>
<sandbox_mode>read-only</sandbox_mode>
<network_access>restricted</network_access>
<shell>{shell_name}</shell>
</environment_context>"#
)
@@ -252,9 +249,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
let body1 = req1.single_request().body_json();
let input1 = body1["input"].as_array().expect("input array");
assert_eq!(input1.len(), 3, "expected cached prefix + env + user msg");
assert_eq!(
input1.len(),
4,
"expected permissions + cached prefix + env + user msg"
);
let ui_text = input1[0]["content"][0]["text"]
let ui_text = input1[1]["content"][0]["text"]
.as_str()
.expect("ui message text");
assert!(
@@ -266,11 +267,11 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
let cwd_str = config.cwd.to_string_lossy();
let expected_env_text = default_env_context_str(&cwd_str, &shell);
assert_eq!(
input1[1],
input1[2],
text_user_input(expected_env_text),
"expected environment context after UI message"
);
assert_eq!(input1[2], text_user_input("hello 1".to_string()));
assert_eq!(input1[3], text_user_input("hello 1".to_string()));
let body2 = req2.single_request().body_json();
let input2 = body2["input"].as_array().expect("input array");
@@ -312,16 +313,17 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let writable = TempDir::new().unwrap();
let new_policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![writable.path().try_into().unwrap()],
network_access: true,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
};
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::WorkspaceWrite {
writable_roots: vec![writable.path().try_into().unwrap()],
network_access: true,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
}),
sandbox_policy: Some(new_policy.clone()),
model: Some("o3".to_string()),
effort: Some(Some(ReasoningEffort::High)),
summary: Some(ReasoningSummary::Detailed),
@@ -354,36 +356,18 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
// After overriding the turn context, the environment context should be emitted again
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
// not change.
let shell = default_user_shell();
let expected_env_text_2 = format!(
r#"<environment_context>
<approval_policy>never</approval_policy>
<sandbox_mode>workspace-write</sandbox_mode>
<network_access>enabled</network_access>
<writable_roots>
<root>{}</root>
</writable_roots>
<shell>{}</shell>
</environment_context>"#,
writable.path().display(),
shell.name()
let expected_permissions_msg = body1["input"][0].clone();
// After overriding the turn context, emit a new permissions message.
let body1_input = body1["input"].as_array().expect("input array");
let expected_permissions_msg_2 = body2["input"][body1_input.len()].clone();
assert_ne!(
expected_permissions_msg_2, expected_permissions_msg,
"expected updated permissions message after override"
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
});
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_env_msg_2, expected_user_message_2].as_slice(),
]
.concat()
);
assert_eq!(body2["input"], expected_body2);
let mut expected_body2 = body1["input"].as_array().expect("input array").to_vec();
expected_body2.push(expected_permissions_msg_2);
expected_body2.push(expected_user_message_2);
assert_eq!(body2["input"], serde_json::Value::Array(expected_body2));
Ok(())
}
@@ -439,10 +423,8 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
.filter(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
.collect();
assert!(
env_texts
.iter()
.any(|text| text.contains("<approval_policy>never</approval_policy>")),
"environment context should reflect overridden approval policy: {env_texts:?}"
!env_texts.is_empty(),
"expected environment context to be emitted: {env_texts:?}"
);
let env_count = input
@@ -462,9 +444,29 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
.is_some()
})
.count();
assert_eq!(
env_count, 2,
"environment context should appear exactly twice, found {env_count}"
assert!(
env_count >= 1,
"environment context should appear at least once, found {env_count}"
);
let permissions_texts: Vec<&str> = input
.iter()
.filter_map(|msg| {
let role = msg["role"].as_str()?;
if role != "developer" {
return None;
}
msg["content"]
.as_array()
.and_then(|content| content.first())
.and_then(|item| item["text"].as_str())
})
.collect();
assert!(
permissions_texts
.iter()
.any(|text| text.contains("`approval_policy` is `never`")),
"permissions message should reflect overridden approval policy: {permissions_texts:?}"
);
let user_texts: Vec<&str> = input
@@ -514,6 +516,12 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
// Second turn using per-turn overrides via UserTurn
let new_cwd = TempDir::new().unwrap();
let writable = TempDir::new().unwrap();
let new_policy = SandboxPolicy::WorkspaceWrite {
writable_roots: vec![AbsolutePathBuf::try_from(writable.path()).unwrap()],
network_access: true,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
@@ -521,12 +529,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
}],
cwd: new_cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::WorkspaceWrite {
writable_roots: vec![AbsolutePathBuf::try_from(writable.path()).unwrap()],
network_access: true,
exclude_tmpdir_env_var: true,
exclude_slash_tmp: true,
},
sandbox_policy: new_policy.clone(),
model: "o3".to_string(),
effort: Some(ReasoningEffort::High),
summary: ReasoningSummary::Detailed,
@@ -556,31 +559,28 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
let expected_env_text_2 = format!(
r#"<environment_context>
<cwd>{}</cwd>
<approval_policy>never</approval_policy>
<sandbox_mode>workspace-write</sandbox_mode>
<network_access>enabled</network_access>
<writable_roots>
<root>{}</root>
</writable_roots>
<shell>{}</shell>
</environment_context>"#,
new_cwd.path().display(),
writable.path().display(),
shell.name(),
shell.name()
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
});
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_env_msg_2, expected_user_message_2].as_slice(),
]
.concat()
let expected_permissions_msg = body1["input"][0].clone();
let body1_input = body1["input"].as_array().expect("input array");
let expected_permissions_msg_2 = body2["input"][body1_input.len() + 1].clone();
assert_ne!(
expected_permissions_msg_2, expected_permissions_msg,
"expected updated permissions message after per-turn override"
);
assert_eq!(body2["input"], expected_body2);
let mut expected_body2 = body1_input.to_vec();
expected_body2.push(expected_env_msg_2);
expected_body2.push(expected_permissions_msg_2);
expected_body2.push(expected_user_message_2);
assert_eq!(body2["input"], serde_json::Value::Array(expected_body2));
Ok(())
}
@@ -648,7 +648,8 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let expected_ui_msg = body1["input"][0].clone();
let expected_permissions_msg = body1["input"][0].clone();
let expected_ui_msg = body1["input"][1].clone();
let shell = default_user_shell();
let default_cwd_lossy = default_cwd.to_string_lossy();
@@ -657,6 +658,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let expected_user_message_1 = text_user_input("hello 1".to_string());
let expected_input_1 = serde_json::Value::Array(vec![
expected_permissions_msg.clone(),
expected_ui_msg.clone(),
expected_env_msg_1.clone(),
expected_user_message_1.clone(),
@@ -665,6 +667,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let expected_user_message_2 = text_user_input("hello 2".to_string());
let expected_input_2 = serde_json::Value::Array(vec![
expected_permissions_msg,
expected_ui_msg,
expected_env_msg_1,
expected_user_message_1,
@@ -738,34 +741,34 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let expected_ui_msg = body1["input"][0].clone();
let expected_permissions_msg = body1["input"][0].clone();
let expected_ui_msg = body1["input"][1].clone();
let shell = default_user_shell();
let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell);
let expected_env_msg_1 = text_user_input(expected_env_text_1);
let expected_user_message_1 = text_user_input("hello 1".to_string());
let expected_input_1 = serde_json::Value::Array(vec![
expected_permissions_msg.clone(),
expected_ui_msg.clone(),
expected_env_msg_1.clone(),
expected_user_message_1.clone(),
]);
assert_eq!(body1["input"], expected_input_1);
let shell_name = shell.name();
let expected_env_msg_2 = text_user_input(format!(
r#"<environment_context>
<approval_policy>never</approval_policy>
<sandbox_mode>danger-full-access</sandbox_mode>
<network_access>enabled</network_access>
<shell>{shell_name}</shell>
</environment_context>"#
));
let body1_input = body1["input"].as_array().expect("input array");
let expected_permissions_msg_2 = body2["input"][body1_input.len()].clone();
assert_ne!(
expected_permissions_msg_2, expected_permissions_msg,
"expected updated permissions message after policy change"
);
let expected_user_message_2 = text_user_input("hello 2".to_string());
let expected_input_2 = serde_json::Value::Array(vec![
expected_permissions_msg,
expected_ui_msg,
expected_env_msg_1,
expected_user_message_1,
expected_env_msg_2,
expected_permissions_msg_2,
expected_user_message_2,
]);
assert_eq!(body2["input"], expected_input_2);