mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
add tests
This commit is contained in:
@@ -2834,8 +2834,7 @@ async fn submit_user_input(
|
||||
Some(cwd),
|
||||
Some(approval_policy),
|
||||
Some(sandbox_policy),
|
||||
// Shell is not configurable from turn to turn
|
||||
None,
|
||||
Some(sess.user_shell.clone()),
|
||||
))])
|
||||
.await;
|
||||
if let Err(items) = sess.inject_input(items) {
|
||||
|
||||
@@ -270,8 +270,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let shell = default_user_shell().await;
|
||||
let shell_line = match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
let expected_env_text_init = format!(
|
||||
// Per-turn environment context includes the shell tag.
|
||||
let expected_env_text_turn = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
@@ -279,29 +284,11 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
<network_access>restricted</network_access>
|
||||
{}</environment_context>"#,
|
||||
cwd.path().to_string_lossy(),
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
);
|
||||
// Per-turn environment context omits the shell tag.
|
||||
let expected_env_text_turn = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
</environment_context>"#,
|
||||
cwd.path().to_string_lossy(),
|
||||
shell_line.as_str(),
|
||||
);
|
||||
let expected_ui_text =
|
||||
"<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";
|
||||
|
||||
let expected_env_msg_init = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_env_text_init } ]
|
||||
});
|
||||
let expected_env_msg_turn = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -319,15 +306,28 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
"content": [ { "type": "input_text", "text": "hello 1" } ]
|
||||
});
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body1_input = body1["input"].as_array().unwrap();
|
||||
assert_eq!(
|
||||
body1["input"],
|
||||
serde_json::json!([
|
||||
expected_ui_msg,
|
||||
expected_env_msg_init,
|
||||
expected_env_msg_turn,
|
||||
expected_user_message_1
|
||||
])
|
||||
body1_input.len(),
|
||||
3,
|
||||
"expected UI, env item, and the user message"
|
||||
);
|
||||
assert_eq!(&body1_input[0], &expected_ui_msg);
|
||||
assert_eq!(&body1_input[1], &expected_env_msg_turn);
|
||||
assert_eq!(body1_input.last().unwrap(), &expected_user_message_1);
|
||||
|
||||
let env_texts: Vec<&str> = body1_input
|
||||
.iter()
|
||||
.filter_map(|msg| {
|
||||
msg.get("content")
|
||||
.and_then(|content| content.as_array())
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|item| item.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
})
|
||||
.filter(|text| text.starts_with("<environment_context>"))
|
||||
.collect();
|
||||
assert_eq!(env_texts, vec![expected_env_text_turn.as_str()]);
|
||||
|
||||
let expected_user_message_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
@@ -443,6 +443,12 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let shell = default_user_shell().await;
|
||||
let shell_line = match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
// After overriding the turn context, the environment context should be emitted again
|
||||
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
|
||||
// not change.
|
||||
@@ -455,9 +461,10 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
</environment_context>"#,
|
||||
{}</environment_context>"#,
|
||||
cwd.path().to_string_lossy(),
|
||||
writable.path().to_string_lossy()
|
||||
writable.path().to_string_lossy(),
|
||||
shell_line.as_str()
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
@@ -568,6 +575,11 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let shell = default_user_shell().await;
|
||||
let shell_line = match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
};
|
||||
let expected_env_text_2 = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
@@ -577,9 +589,10 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
</environment_context>"#,
|
||||
{}</environment_context>"#,
|
||||
new_cwd.path().to_string_lossy(),
|
||||
writable.path().to_string_lossy()
|
||||
writable.path().to_string_lossy(),
|
||||
shell_line.as_str()
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
@@ -595,3 +608,304 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
|
||||
);
|
||||
assert_eq!(body2["input"], expected_body2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn changing_approval_policy_mid_session_preserves_shell_tool_description_and_json() {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
// Keep toolset minimal/deterministic for this assertion
|
||||
config.include_apply_patch_tool = false;
|
||||
config.include_plan_tool = false;
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
// First turn with default approval policy (on-request)
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Change approval policy mid-session to Never
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_policy: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Second turn after overrides
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
// Extract the shell tool from the tools array in each request
|
||||
let find_shell = |body: &serde_json::Value| -> serde_json::Value {
|
||||
body["tools"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|t| t["type"] == "function" && t["name"] == "shell")
|
||||
.cloned()
|
||||
.expect("shell tool present")
|
||||
};
|
||||
|
||||
let shell1 = find_shell(&body1);
|
||||
let shell2 = find_shell(&body2);
|
||||
|
||||
// Description must be stable across approval policy changes
|
||||
assert_eq!(
|
||||
shell1["description"],
|
||||
serde_json::json!("Runs a shell command and returns its output")
|
||||
);
|
||||
assert_eq!(shell1["description"], shell2["description"]);
|
||||
|
||||
// Raw JSON expectation for shell tool under on-request (ShellWithRequest)
|
||||
let expected_shell_on_request = serde_json::json!({
|
||||
"type": "function",
|
||||
"name": "shell",
|
||||
"description": "Runs a shell command and returns its output",
|
||||
"strict": false,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "The command to execute"
|
||||
},
|
||||
"workdir": {
|
||||
"type": "string",
|
||||
"description": "Working directory to execute the command in."
|
||||
},
|
||||
"timeout_ms": {
|
||||
"type": "number",
|
||||
"description": "Timeout for the command in milliseconds."
|
||||
},
|
||||
"with_escalated_permissions": {
|
||||
"type": "boolean",
|
||||
"description": "Request escalated permissions, only for when a command would otherwise be blocked by the sandbox."
|
||||
},
|
||||
"justification": {
|
||||
"type": "string",
|
||||
"description": "Required if and only if with_escalated_permissions == true. One sentence explaining why escalation is needed (e.g., write outside CWD, network fetch, git commit)."
|
||||
}
|
||||
},
|
||||
"required": ["command"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
});
|
||||
|
||||
// Raw JSON expectation for shell tool under never (DefaultShell)
|
||||
let expected_shell_never = serde_json::json!({
|
||||
"type": "function",
|
||||
"name": "shell",
|
||||
"description": "Runs a shell command and returns its output",
|
||||
"strict": false,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "The command to execute"
|
||||
},
|
||||
"workdir": {
|
||||
"type": "string",
|
||||
"description": "The working directory to execute the command in"
|
||||
},
|
||||
"timeout_ms": {
|
||||
"type": "number",
|
||||
"description": "The timeout for the command in milliseconds"
|
||||
}
|
||||
},
|
||||
"required": ["command"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
});
|
||||
|
||||
assert_eq!(shell1, expected_shell_on_request);
|
||||
assert_eq!(shell2, expected_shell_never);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tools_stable_across_all_approval_policy_transitions() {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Build all transitions FROM each to each other (exclude self transitions)
|
||||
let policies = vec![
|
||||
AskForApproval::UnlessTrusted,
|
||||
AskForApproval::OnFailure,
|
||||
AskForApproval::OnRequest,
|
||||
AskForApproval::Never,
|
||||
];
|
||||
let mut transitions: Vec<(AskForApproval, AskForApproval)> = Vec::new();
|
||||
for &from in &policies {
|
||||
for &to in &policies {
|
||||
if from != to {
|
||||
transitions.push((from, to));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Expect 2 POSTs per transition
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect((transitions.len() * 2) as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
// Keep tools stable and minimal
|
||||
config.include_plan_tool = false;
|
||||
config.include_apply_patch_tool = false;
|
||||
config.tools_web_search_request = false;
|
||||
config.use_experimental_unified_exec_tool = true; // policy-independent tool
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
for (i, (from, to)) in transitions.iter().enumerate() {
|
||||
// Ensure a known starting policy for this pair
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: Some(*from),
|
||||
sandbox_policy: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: format!("turn {i}-a"),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Override to the target policy and send next turn
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: Some(*to),
|
||||
sandbox_policy: None,
|
||||
model: None,
|
||||
effort: None,
|
||||
summary: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: format!("turn {i}-b"),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
// Verify tool arrays are identical across each pair of requests
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
transitions.len() * 2,
|
||||
"expected 2 requests per transition"
|
||||
);
|
||||
|
||||
for i in 0..transitions.len() {
|
||||
let body_a = requests[2 * i].body_json::<serde_json::Value>().unwrap();
|
||||
let body_b = requests[2 * i + 1]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
body_a["tools"], body_b["tools"],
|
||||
"tools changed between requests for transition #{i}: {:?}",
|
||||
transitions[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user