add tests

2026-04-24 14:45:27 +00:00 · 2025-09-12 11:29:25 -04:00
parent 9a7266a33f
commit cecf3a82a6
2 changed files with 346 additions and 33 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2834,8 +2834,7 @@ async fn submit_user_input(
        Some(cwd),
        Some(approval_policy),
        Some(sandbox_policy),
-        // Shell is not configurable from turn to turn
-        None,
+        Some(sess.user_shell.clone()),
    ))])
    .await;
    if let Err(items) = sess.inject_input(items) {
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -270,8 +270,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
    assert_eq!(requests.len(), 2, "expected two POST requests");

    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };

-    let expected_env_text_init = format!(
+    // Per-turn environment context includes the shell tag.
+    let expected_env_text_turn = format!(
        r#"<environment_context>
  <cwd>{}</cwd>
  <approval_policy>on-request</approval_policy>
@@ -279,29 +284,11 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
  <network_access>restricted</network_access>
 {}</environment_context>"#,
        cwd.path().to_string_lossy(),
-        match shell.name() {
-            Some(name) => format!("  <shell>{name}</shell>\n"),
-            None => String::new(),
-        }
-    );
-    // Per-turn environment context omits the shell tag.
-    let expected_env_text_turn = format!(
-        r#"<environment_context>
-  <cwd>{}</cwd>
-  <approval_policy>on-request</approval_policy>
-  <sandbox_mode>read-only</sandbox_mode>
-  <network_access>restricted</network_access>
-</environment_context>"#,
-        cwd.path().to_string_lossy(),
+        shell_line.as_str(),
    );
    let expected_ui_text =
        "<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";

-    let expected_env_msg_init = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": expected_env_text_init } ]
-    });
    let expected_env_msg_turn = serde_json::json!({
        "type": "message",
        "role": "user",
@@ -319,15 +306,28 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
        "content": [ { "type": "input_text", "text": "hello 1" } ]
    });
    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body1_input = body1["input"].as_array().unwrap();
    assert_eq!(
-        body1["input"],
-        serde_json::json!([
-            expected_ui_msg,
-            expected_env_msg_init,
-            expected_env_msg_turn,
-            expected_user_message_1
-        ])
+        body1_input.len(),
+        3,
+        "expected UI, env item, and the user message"
    );
+    assert_eq!(&body1_input[0], &expected_ui_msg);
+    assert_eq!(&body1_input[1], &expected_env_msg_turn);
+    assert_eq!(body1_input.last().unwrap(), &expected_user_message_1);
+
+    let env_texts: Vec<&str> = body1_input
+        .iter()
+        .filter_map(|msg| {
+            msg.get("content")
+                .and_then(|content| content.as_array())
+                .and_then(|content| content.first())
+                .and_then(|item| item.get("text"))
+                .and_then(|text| text.as_str())
+        })
+        .filter(|text| text.starts_with("<environment_context>"))
+        .collect();
+    assert_eq!(env_texts, vec![expected_env_text_turn.as_str()]);

    let expected_user_message_2 = serde_json::json!({
        "type": "message",
@@ -443,6 +443,12 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
        "role": "user",
        "content": [ { "type": "input_text", "text": "hello 2" } ]
    });
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
+
    // After overriding the turn context, the environment context should be emitted again
    // reflecting the new approval policy and sandbox settings. Omit cwd because it did
    // not change.
@@ -455,9 +461,10 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
  <writable_roots>
    <root>{}</root>
  </writable_roots>
-</environment_context>"#,
+{}</environment_context>"#,
        cwd.path().to_string_lossy(),
-        writable.path().to_string_lossy()
+        writable.path().to_string_lossy(),
+        shell_line.as_str()
    );
    let expected_env_msg_2 = serde_json::json!({
        "type": "message",
@@ -568,6 +575,11 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
        "role": "user",
        "content": [ { "type": "input_text", "text": "hello 2" } ]
    });
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
    let expected_env_text_2 = format!(
        r#"<environment_context>
  <cwd>{}</cwd>
@@ -577,9 +589,10 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
  <writable_roots>
    <root>{}</root>
  </writable_roots>
-</environment_context>"#,
+{}</environment_context>"#,
        new_cwd.path().to_string_lossy(),
-        writable.path().to_string_lossy()
+        writable.path().to_string_lossy(),
+        shell_line.as_str()
    );
    let expected_env_msg_2 = serde_json::json!({
        "type": "message",
@@ -595,3 +608,304 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
    );
    assert_eq!(body2["input"], expected_body2);
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn changing_approval_policy_mid_session_preserves_shell_tool_description_and_json() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+    // Keep toolset minimal/deterministic for this assertion
+    config.include_apply_patch_tool = false;
+    config.include_plan_tool = false;
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    // First turn with default approval policy (on-request)
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Change approval policy mid-session to Never
+    codex
+        .submit(Op::OverrideTurnContext {
+            cwd: None,
+            approval_policy: Some(AskForApproval::Never),
+            sandbox_policy: None,
+            model: None,
+            effort: None,
+            summary: None,
+        })
+        .await
+        .unwrap();
+
+    // Second turn after overrides
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
+
+    // Extract the shell tool from the tools array in each request
+    let find_shell = |body: &serde_json::Value| -> serde_json::Value {
+        body["tools"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .find(|t| t["type"] == "function" && t["name"] == "shell")
+            .cloned()
+            .expect("shell tool present")
+    };
+
+    let shell1 = find_shell(&body1);
+    let shell2 = find_shell(&body2);
+
+    // Description must be stable across approval policy changes
+    assert_eq!(
+        shell1["description"],
+        serde_json::json!("Runs a shell command and returns its output")
+    );
+    assert_eq!(shell1["description"], shell2["description"]);
+
+    // Raw JSON expectation for shell tool under on-request (ShellWithRequest)
+    let expected_shell_on_request = serde_json::json!({
+        "type": "function",
+        "name": "shell",
+        "description": "Runs a shell command and returns its output",
+        "strict": false,
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "array",
+                    "items": { "type": "string" },
+                    "description": "The command to execute"
+                },
+                "workdir": {
+                    "type": "string",
+                    "description": "Working directory to execute the command in."
+                },
+                "timeout_ms": {
+                    "type": "number",
+                    "description": "Timeout for the command in milliseconds."
+                },
+                "with_escalated_permissions": {
+                    "type": "boolean",
+                    "description": "Request escalated permissions, only for when a command would otherwise be blocked by the sandbox."
+                },
+                "justification": {
+                    "type": "string",
+                    "description": "Required if and only if with_escalated_permissions == true. One sentence explaining why escalation is needed (e.g., write outside CWD, network fetch, git commit)."
+                }
+            },
+            "required": ["command"],
+            "additionalProperties": false
+        }
+    });
+
+    // Raw JSON expectation for shell tool under never (DefaultShell)
+    let expected_shell_never = serde_json::json!({
+        "type": "function",
+        "name": "shell",
+        "description": "Runs a shell command and returns its output",
+        "strict": false,
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "array",
+                    "items": { "type": "string" },
+                    "description": "The command to execute"
+                },
+                "workdir": {
+                    "type": "string",
+                    "description": "The working directory to execute the command in"
+                },
+                "timeout_ms": {
+                    "type": "number",
+                    "description": "The timeout for the command in milliseconds"
+                }
+            },
+            "required": ["command"],
+            "additionalProperties": false
+        }
+    });
+
+    assert_eq!(shell1, expected_shell_on_request);
+    assert_eq!(shell2, expected_shell_never);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tools_stable_across_all_approval_policy_transitions() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Build all transitions FROM each to each other (exclude self transitions)
+    let policies = vec![
+        AskForApproval::UnlessTrusted,
+        AskForApproval::OnFailure,
+        AskForApproval::OnRequest,
+        AskForApproval::Never,
+    ];
+    let mut transitions: Vec<(AskForApproval, AskForApproval)> = Vec::new();
+    for &from in &policies {
+        for &to in &policies {
+            if from != to {
+                transitions.push((from, to));
+            }
+        }
+    }
+
+    // Expect 2 POSTs per transition
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect((transitions.len() * 2) as u64)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+    // Keep tools stable and minimal
+    config.include_plan_tool = false;
+    config.include_apply_patch_tool = false;
+    config.tools_web_search_request = false;
+    config.use_experimental_unified_exec_tool = true; // policy-independent tool
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    for (i, (from, to)) in transitions.iter().enumerate() {
+        // Ensure a known starting policy for this pair
+        codex
+            .submit(Op::OverrideTurnContext {
+                cwd: None,
+                approval_policy: Some(*from),
+                sandbox_policy: None,
+                model: None,
+                effort: None,
+                summary: None,
+            })
+            .await
+            .unwrap();
+
+        codex
+            .submit(Op::UserInput {
+                items: vec![InputItem::Text {
+                    text: format!("turn {i}-a"),
+                }],
+            })
+            .await
+            .unwrap();
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+        // Override to the target policy and send next turn
+        codex
+            .submit(Op::OverrideTurnContext {
+                cwd: None,
+                approval_policy: Some(*to),
+                sandbox_policy: None,
+                model: None,
+                effort: None,
+                summary: None,
+            })
+            .await
+            .unwrap();
+
+        codex
+            .submit(Op::UserInput {
+                items: vec![InputItem::Text {
+                    text: format!("turn {i}-b"),
+                }],
+            })
+            .await
+            .unwrap();
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+    }
+
+    // Verify tool arrays are identical across each pair of requests
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(
+        requests.len(),
+        transitions.len() * 2,
+        "expected 2 requests per transition"
+    );
+
+    for i in 0..transitions.len() {
+        let body_a = requests[2 * i].body_json::<serde_json::Value>().unwrap();
+        let body_b = requests[2 * i + 1]
+            .body_json::<serde_json::Value>()
+            .unwrap();
+        assert_eq!(
+            body_a["tools"], body_b["tools"],
+            "tools changed between requests for transition #{i}: {:?}",
+            transitions[i]
+        );
+    }
+}