add tests

This commit is contained in:
Ahmed Ibrahim
2025-09-12 11:29:25 -04:00
parent 9a7266a33f
commit cecf3a82a6
2 changed files with 346 additions and 33 deletions

View File

@@ -2834,8 +2834,7 @@ async fn submit_user_input(
Some(cwd),
Some(approval_policy),
Some(sandbox_policy),
// Shell is not configurable from turn to turn
None,
Some(sess.user_shell.clone()),
))])
.await;
if let Err(items) = sess.inject_input(items) {

View File

@@ -270,8 +270,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
assert_eq!(requests.len(), 2, "expected two POST requests");
let shell = default_user_shell().await;
let shell_line = match shell.name() {
Some(name) => format!(" <shell>{name}</shell>\n"),
None => String::new(),
};
let expected_env_text_init = format!(
// Per-turn environment context includes the shell tag.
let expected_env_text_turn = format!(
r#"<environment_context>
<cwd>{}</cwd>
<approval_policy>on-request</approval_policy>
@@ -279,29 +284,11 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
<network_access>restricted</network_access>
{}</environment_context>"#,
cwd.path().to_string_lossy(),
match shell.name() {
Some(name) => format!(" <shell>{name}</shell>\n"),
None => String::new(),
}
);
// Per-turn environment context omits the shell tag.
let expected_env_text_turn = format!(
r#"<environment_context>
<cwd>{}</cwd>
<approval_policy>on-request</approval_policy>
<sandbox_mode>read-only</sandbox_mode>
<network_access>restricted</network_access>
</environment_context>"#,
cwd.path().to_string_lossy(),
shell_line.as_str(),
);
let expected_ui_text =
"<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";
let expected_env_msg_init = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_init } ]
});
let expected_env_msg_turn = serde_json::json!({
"type": "message",
"role": "user",
@@ -319,15 +306,28 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
"content": [ { "type": "input_text", "text": "hello 1" } ]
});
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
let body1_input = body1["input"].as_array().unwrap();
assert_eq!(
body1["input"],
serde_json::json!([
expected_ui_msg,
expected_env_msg_init,
expected_env_msg_turn,
expected_user_message_1
])
body1_input.len(),
3,
"expected UI, env item, and the user message"
);
assert_eq!(&body1_input[0], &expected_ui_msg);
assert_eq!(&body1_input[1], &expected_env_msg_turn);
assert_eq!(body1_input.last().unwrap(), &expected_user_message_1);
let env_texts: Vec<&str> = body1_input
.iter()
.filter_map(|msg| {
msg.get("content")
.and_then(|content| content.as_array())
.and_then(|content| content.first())
.and_then(|item| item.get("text"))
.and_then(|text| text.as_str())
})
.filter(|text| text.starts_with("<environment_context>"))
.collect();
assert_eq!(env_texts, vec![expected_env_text_turn.as_str()]);
let expected_user_message_2 = serde_json::json!({
"type": "message",
@@ -443,6 +443,12 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
let shell = default_user_shell().await;
let shell_line = match shell.name() {
Some(name) => format!(" <shell>{name}</shell>\n"),
None => String::new(),
};
// After overriding the turn context, the environment context should be emitted again
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
// not change.
@@ -455,9 +461,10 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
<writable_roots>
<root>{}</root>
</writable_roots>
</environment_context>"#,
{}</environment_context>"#,
cwd.path().to_string_lossy(),
writable.path().to_string_lossy()
writable.path().to_string_lossy(),
shell_line.as_str()
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
@@ -568,6 +575,11 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
let shell = default_user_shell().await;
let shell_line = match shell.name() {
Some(name) => format!(" <shell>{name}</shell>\n"),
None => String::new(),
};
let expected_env_text_2 = format!(
r#"<environment_context>
<cwd>{}</cwd>
@@ -577,9 +589,10 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
<writable_roots>
<root>{}</root>
</writable_roots>
</environment_context>"#,
{}</environment_context>"#,
new_cwd.path().to_string_lossy(),
writable.path().to_string_lossy()
writable.path().to_string_lossy(),
shell_line.as_str()
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
@@ -595,3 +608,304 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
);
assert_eq!(body2["input"], expected_body2);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn changing_approval_policy_mid_session_preserves_shell_tool_description_and_json() {
use pretty_assertions::assert_eq;
let server = MockServer::start().await;
let sse = sse_completed("resp");
let template = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
// Expect two POSTs to /v1/responses
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(template)
.expect(2)
.mount(&server)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let cwd = TempDir::new().unwrap();
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.cwd = cwd.path().to_path_buf();
config.model_provider = model_provider;
config.user_instructions = Some("be consistent and helpful".to_string());
// Keep toolset minimal/deterministic for this assertion
config.include_apply_patch_tool = false;
config.include_plan_tool = false;
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let codex = conversation_manager
.new_conversation(config)
.await
.expect("create new conversation")
.conversation;
// First turn with default approval policy (on-request)
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello 1".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Change approval policy mid-session to Never
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await
.unwrap();
// Second turn after overrides
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello 2".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.unwrap();
assert_eq!(requests.len(), 2, "expected two POST requests");
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
// Extract the shell tool from the tools array in each request
let find_shell = |body: &serde_json::Value| -> serde_json::Value {
body["tools"]
.as_array()
.unwrap()
.iter()
.find(|t| t["type"] == "function" && t["name"] == "shell")
.cloned()
.expect("shell tool present")
};
let shell1 = find_shell(&body1);
let shell2 = find_shell(&body2);
// Description must be stable across approval policy changes
assert_eq!(
shell1["description"],
serde_json::json!("Runs a shell command and returns its output")
);
assert_eq!(shell1["description"], shell2["description"]);
// Raw JSON expectation for shell tool under on-request (ShellWithRequest)
let expected_shell_on_request = serde_json::json!({
"type": "function",
"name": "shell",
"description": "Runs a shell command and returns its output",
"strict": false,
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "array",
"items": { "type": "string" },
"description": "The command to execute"
},
"workdir": {
"type": "string",
"description": "Working directory to execute the command in."
},
"timeout_ms": {
"type": "number",
"description": "Timeout for the command in milliseconds."
},
"with_escalated_permissions": {
"type": "boolean",
"description": "Request escalated permissions, only for when a command would otherwise be blocked by the sandbox."
},
"justification": {
"type": "string",
"description": "Required if and only if with_escalated_permissions == true. One sentence explaining why escalation is needed (e.g., write outside CWD, network fetch, git commit)."
}
},
"required": ["command"],
"additionalProperties": false
}
});
// Raw JSON expectation for shell tool under never (DefaultShell)
let expected_shell_never = serde_json::json!({
"type": "function",
"name": "shell",
"description": "Runs a shell command and returns its output",
"strict": false,
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "array",
"items": { "type": "string" },
"description": "The command to execute"
},
"workdir": {
"type": "string",
"description": "The working directory to execute the command in"
},
"timeout_ms": {
"type": "number",
"description": "The timeout for the command in milliseconds"
}
},
"required": ["command"],
"additionalProperties": false
}
});
assert_eq!(shell1, expected_shell_on_request);
assert_eq!(shell2, expected_shell_never);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tools_stable_across_all_approval_policy_transitions() {
use pretty_assertions::assert_eq;
let server = MockServer::start().await;
let sse = sse_completed("resp");
let template = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
// Build all transitions FROM each to each other (exclude self transitions)
let policies = vec![
AskForApproval::UnlessTrusted,
AskForApproval::OnFailure,
AskForApproval::OnRequest,
AskForApproval::Never,
];
let mut transitions: Vec<(AskForApproval, AskForApproval)> = Vec::new();
for &from in &policies {
for &to in &policies {
if from != to {
transitions.push((from, to));
}
}
}
// Expect 2 POSTs per transition
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(template)
.expect((transitions.len() * 2) as u64)
.mount(&server)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let cwd = TempDir::new().unwrap();
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.cwd = cwd.path().to_path_buf();
config.model_provider = model_provider;
config.user_instructions = Some("be consistent and helpful".to_string());
// Keep tools stable and minimal
config.include_plan_tool = false;
config.include_apply_patch_tool = false;
config.tools_web_search_request = false;
config.use_experimental_unified_exec_tool = true; // policy-independent tool
let conversation_manager =
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
let codex = conversation_manager
.new_conversation(config)
.await
.expect("create new conversation")
.conversation;
for (i, (from, to)) in transitions.iter().enumerate() {
// Ensure a known starting policy for this pair
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(*from),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await
.unwrap();
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: format!("turn {i}-a"),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Override to the target policy and send next turn
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(*to),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await
.unwrap();
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: format!("turn {i}-b"),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
}
// Verify tool arrays are identical across each pair of requests
let requests = server.received_requests().await.unwrap();
assert_eq!(
requests.len(),
transitions.len() * 2,
"expected 2 requests per transition"
);
for i in 0..transitions.len() {
let body_a = requests[2 * i].body_json::<serde_json::Value>().unwrap();
let body_b = requests[2 * i + 1]
.body_json::<serde_json::Value>()
.unwrap();
assert_eq!(
body_a["tools"], body_b["tools"],
"tools changed between requests for transition #{i}: {:?}",
transitions[i]
);
}
}